From 8cdb4f70d3f117f0562205393ae28dd0e4efb2a9 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 28 May 2023 20:57:01 +0300 Subject: [PATCH 001/125] move to one plugin wip --- go.mod | 2 + go.sum | 6 +- internal/servers/plugin/v0/plugin.go | 295 +++++++++++ plugin/benchmark_test.go | 429 ++++++++++++++++ plugin/docs.go | 242 +++++++++ plugin/docs_test.go | 164 ++++++ plugin/metrics.go | 125 +++++ plugin/metrics_test.go | 37 ++ plugin/options.go | 46 ++ plugin/plugin.go | 326 ++++++++++++ plugin/plugin_round_robin_test.go | 148 ++++++ plugin/plugin_test.go | 470 ++++++++++++++++++ plugin/scheduler.go | 163 ++++++ plugin/scheduler_dfs.go | 230 +++++++++ plugin/scheduler_round_robin.go | 104 ++++ plugin/scheduler_round_robin_test.go | 65 +++ plugin/templates/all_tables.md.go.tpl | 5 + plugin/templates/all_tables_entry.md.go.tpl | 5 + plugin/templates/table.md.go.tpl | 44 ++ .../TestGeneratePluginDocs-JSON-__tables.json | 214 ++++++++ .../TestGeneratePluginDocs-Markdown-README.md | 10 + ...tePluginDocs-Markdown-incremental_table.md | 20 + ...Docs-Markdown-relation_relation_table_a.md | 21 + ...Docs-Markdown-relation_relation_table_b.md | 21 + ...eratePluginDocs-Markdown-relation_table.md | 25 + ...tGeneratePluginDocs-Markdown-test_table.md | 29 ++ plugin/testing.go | 141 ++++++ plugin/validate.go | 27 + serve/plugin.go | 235 +++++++++ serve/plugin_test.go | 238 +++++++++ 30 files changed, 3883 insertions(+), 4 deletions(-) create mode 100644 internal/servers/plugin/v0/plugin.go create mode 100644 plugin/benchmark_test.go create mode 100644 plugin/docs.go create mode 100644 plugin/docs_test.go create mode 100644 plugin/metrics.go create mode 100644 plugin/metrics_test.go create mode 100644 plugin/options.go create mode 100644 plugin/plugin.go create mode 100644 plugin/plugin_round_robin_test.go create mode 100644 plugin/plugin_test.go create mode 100644 plugin/scheduler.go create mode 100644 plugin/scheduler_dfs.go create mode 100644 plugin/scheduler_round_robin.go create mode 100644 plugin/scheduler_round_robin_test.go create mode 100644 plugin/templates/all_tables.md.go.tpl create mode 100644 plugin/templates/all_tables_entry.md.go.tpl create mode 100644 plugin/templates/table.md.go.tpl create mode 100644 plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-README.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md create mode 100644 plugin/testing.go create mode 100644 plugin/validate.go create mode 100644 serve/plugin.go create mode 100644 serve/plugin_test.go diff --git a/go.mod b/go.mod index f86e63ca3c..d303bddb50 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,8 @@ require ( replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 +replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go + require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect diff --git a/go.sum b/go.sum index 7289c4e5dc..8f7dfaf7d1 100644 --- a/go.sum +++ b/go.sum @@ -40,10 +40,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 h1:8eQrRKCk6OwCiIW43+Y10p2nkTdTATu5kqXEA7iBlg8= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= -github.com/cloudquery/plugin-pb-go v1.1.0 h1:F1r/x4aF5aO1hmgBk8rqAp2oejeYMMughTPaYosQLDk= -github.com/cloudquery/plugin-pb-go v1.1.0/go.mod h1:327Dd56bQ357KNIbhZNGDoJ7jPYXsCZWZ4Tj955gU7M= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSEGQNLHpUQ5cU4L4aF7cuJZRnc1toIIWqC1gmPg= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= diff --git a/internal/servers/plugin/v0/plugin.go b/internal/servers/plugin/v0/plugin.go new file mode 100644 index 0000000000..d00b16059c --- /dev/null +++ b/internal/servers/plugin/v0/plugin.go @@ -0,0 +1,295 @@ +package plugin + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/plugin" + "github.com/cloudquery/plugin-sdk/v3/plugins/source" + "github.com/cloudquery/plugin-sdk/v3/scalar" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/getsentry/sentry-go" + "github.com/rs/zerolog" + "golang.org/x/sync/errgroup" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" +) + +const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB + +type Server struct { + pb.UnimplementedPluginServer + Plugin *plugin.Plugin + Logger zerolog.Logger + spec pb.Spec +} + +func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { + tables := s.Plugin.StaticTables().ToArrowSchemas() + encoded, err := tables.Encode() + if err != nil { + return nil, fmt.Errorf("failed to encode tables: %w", err) + } + return &pb.GetStaticTables_Response{ + Tables: encoded, + }, nil +} + +func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { + // TODO: Fix this + tables := s.Plugin.StaticTables().ToArrowSchemas() + encoded, err := tables.Encode() + if err != nil { + return nil, fmt.Errorf("failed to encode tables: %w", err) + } + return &pb.GetDynamicTables_Response{ + Tables: encoded, + }, nil +} + +func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { + return &pb.GetName_Response{ + Name: s.Plugin.Name(), + }, nil +} + +func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVersion_Response, error) { + return &pb.GetVersion_Response{ + Version: s.Plugin.Version(), + }, nil +} + +func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { + if err := s.Plugin.Init(ctx, *req.Spec); err != nil { + return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) + } + s.spec = *req.Spec + return &pb.Init_Response{}, nil +} + +func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { + resources := make(chan *schema.Resource) + var syncErr error + ctx := stream.Context() + + go func() { + defer close(resources) + err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, resources) + if err != nil { + syncErr = fmt.Errorf("failed to sync resources: %w", err) + } + }() + + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + + var buf bytes.Buffer + w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) + if err := w.Write(rec); err != nil { + return status.Errorf(codes.Internal, "failed to write record: %v", err) + } + if err := w.Close(); err != nil { + return status.Errorf(codes.Internal, "failed to close writer: %v", err) + } + + msg := &pb.Sync_Response{ + Resource: buf.Bytes(), + } + err := checkMessageSize(msg, resource) + if err != nil { + s.Logger.Warn().Str("table", resource.Table.Name). + Int("bytes", len(msg.String())). + Msg("Row exceeding max bytes ignored") + continue + } + if err := stream.Send(msg); err != nil { + return status.Errorf(codes.Internal, "failed to send resource: %v", err) + } + } + + return syncErr +} + +func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMetrics_Response, error) { + // Aggregate metrics before sending to keep response size small. + // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 + m := s.Plugin.Metrics() + agg := &source.TableClientMetrics{} + for _, table := range m.TableClient { + for _, tableClient := range table { + agg.Resources += tableClient.Resources + agg.Errors += tableClient.Errors + agg.Panics += tableClient.Panics + } + } + b, err := json.Marshal(&source.Metrics{ + TableClient: map[string]map[string]*source.TableClientMetrics{"": {"": agg}}, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal source metrics: %w", err) + } + return &pb.GetMetrics_Response{ + Metrics: b, + }, nil +} + +func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migrate_Response, error) { + schemas, err := schema.NewSchemasFromBytes(req.Tables) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) + } + tables, err := schema.NewTablesFromArrowSchemas(schemas) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) + } + s.setPKsForTables(tables) + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) +} + +func (s *Server) Write(msg pb.Plugin_WriteServer) error { + resources := make(chan arrow.Record) + + r, err := msg.Recv() + if err != nil { + if err == io.EOF { + return msg.SendAndClose(&pb.Write_Response{}) + } + return status.Errorf(codes.Internal, "failed to receive msg: %v", err) + } + + schemas, err := schema.NewSchemasFromBytes(r.Tables) + if err != nil { + return status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) + } + tables, err := schema.NewTablesFromArrowSchemas(schemas) + if err != nil { + return status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) + } + s.setPKsForTables(tables) + sourceSpec := *r.SourceSpec + syncTime := r.Timestamp.AsTime() + eg, ctx := errgroup.WithContext(msg.Context()) + eg.Go(func() error { + return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + }) + + for { + r, err := msg.Recv() + if err == io.EOF { + close(resources) + if err := eg.Wait(); err != nil { + return status.Errorf(codes.Internal, "write failed: %v", err) + } + return msg.SendAndClose(&pb.Write_Response{}) + } + if err != nil { + close(resources) + if wgErr := eg.Wait(); wgErr != nil { + return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) + } + return status.Errorf(codes.Internal, "failed to receive msg: %v", err) + } + rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) + if err != nil { + close(resources) + if wgErr := eg.Wait(); wgErr != nil { + return status.Errorf(codes.InvalidArgument, "failed to create reader: %v and write failed: %v", err, wgErr) + } + return status.Errorf(codes.InvalidArgument, "failed to create reader: %v", err) + } + for rdr.Next() { + rec := rdr.Record() + rec.Retain() + select { + case resources <- rec: + case <-ctx.Done(): + close(resources) + if err := eg.Wait(); err != nil { + return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) + } + return status.Errorf(codes.Internal, "Context done: %v", ctx.Err()) + } + } + if err := rdr.Err(); err != nil { + return status.Errorf(codes.InvalidArgument, "failed to read resource: %v", err) + } + } +} + +func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { + tmpDir := os.TempDir() + defer os.RemoveAll(tmpDir) + err := s.Plugin.GeneratePluginDocs(s.Plugin.StaticTables(), tmpDir, req.Format) + if err != nil { + return fmt.Errorf("failed to generate docs: %w", err) + } + + // list files in tmpDir + files, err := ioutil.ReadDir(tmpDir) + if err != nil { + return fmt.Errorf("failed to read tmp dir: %w", err) + } + for _, f := range files { + if f.IsDir() { + continue + } + content, err := os.ReadFile(filepath.Join(tmpDir, f.Name())) + if err != nil { + return fmt.Errorf("failed to read file: %w", err) + } + if err := srv.Send(&pb.GenDocs_Response{ + Filename: f.Name(), + Content: content, + }); err != nil { + return fmt.Errorf("failed to send file: %w", err) + } + } + return nil +} + +func checkMessageSize(msg proto.Message, resource *schema.Resource) error { + size := proto.Size(msg) + // log error to Sentry if row exceeds half of the max size + if size > MaxMsgSize/2 { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetExtra("bytes", size) + sentry.CurrentHub().CaptureMessage("Large message detected") + }) + } + if size > MaxMsgSize { + return errors.New("message exceeds max size") + } + return nil +} + +func (s *Server) setPKsForTables(tables schema.Tables) { + if s.spec.WriteSpec.PkMode == pb.WriteSpec_CQ_ID_ONLY { + setCQIDAsPrimaryKeysForTables(tables) + } +} + +func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { + for _, table := range tables { + for i, col := range table.Columns { + table.Columns[i].PrimaryKey = col.Name == schema.CqIDColumn.Name + } + setCQIDAsPrimaryKeysForTables(table.Relations) + } +} \ No newline at end of file diff --git a/plugin/benchmark_test.go b/plugin/benchmark_test.go new file mode 100644 index 0000000000..36a86cd3cd --- /dev/null +++ b/plugin/benchmark_test.go @@ -0,0 +1,429 @@ +package plugin + +import ( + "context" + "fmt" + "math/rand" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" + "golang.org/x/sync/errgroup" +) + +type BenchmarkScenario struct { + Client Client + Scheduler specs.Scheduler + Clients int + Tables int + ChildrenPerTable int + Columns int + ColumnResolvers int // number of columns with custom resolvers + ResourcesPerTable int + ResourcesPerPage int + NoPreResourceResolver bool + Concurrency uint64 +} + +func (s *BenchmarkScenario) SetDefaults() { + if s.Clients == 0 { + s.Clients = 1 + } + if s.Tables == 0 { + s.Tables = 1 + } + if s.Columns == 0 { + s.Columns = 10 + } + if s.ResourcesPerTable == 0 { + s.ResourcesPerTable = 100 + } + if s.ResourcesPerPage == 0 { + s.ResourcesPerPage = 10 + } +} + +type ClientTest interface { + Call(clientID, tableName string) error +} + +type Benchmark struct { + *BenchmarkScenario + + b *testing.B + tables []*schema.Table + plugin *Plugin + + apiCalls atomic.Int64 +} + +func NewBenchmark(b *testing.B, scenario BenchmarkScenario) *Benchmark { + scenario.SetDefaults() + sb := &Benchmark{ + BenchmarkScenario: &scenario, + b: b, + tables: nil, + plugin: nil, + } + sb.setup(b) + return sb +} + +func (s *Benchmark) setup(b *testing.B) { + createResolvers := func(tableName string) (schema.TableResolver, schema.RowResolver, schema.ColumnResolver) { + tableResolver := func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { + total := 0 + for total < s.ResourcesPerTable { + s.simulateAPICall(meta.ID(), tableName) + num := min(s.ResourcesPerPage, s.ResourcesPerTable-total) + resources := make([]struct { + Column1 string + }, num) + for i := 0; i < num; i++ { + resources[i] = struct { + Column1 string + }{ + Column1: "test-column", + } + } + res <- resources + total += num + } + return nil + } + preResourceResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource) error { + s.simulateAPICall(meta.ID(), tableName) + resource.Item = struct { + Column1 string + }{ + Column1: "test-pre", + } + return nil + } + columnResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + s.simulateAPICall(meta.ID(), tableName) + return resource.Set(c.Name, "test") + } + return tableResolver, preResourceResolver, columnResolver + } + + s.tables = make([]*schema.Table, s.Tables) + for i := 0; i < s.Tables; i++ { + tableResolver, preResourceResolver, columnResolver := createResolvers(fmt.Sprintf("table%d", i)) + columns := make([]schema.Column, s.Columns) + for u := 0; u < s.Columns; u++ { + columns[u] = schema.Column{ + Name: fmt.Sprintf("column%d", u), + Type: arrow.BinaryTypes.String, + } + if u < s.ColumnResolvers { + columns[u].Resolver = columnResolver + } + } + relations := make([]*schema.Table, s.ChildrenPerTable) + for u := 0; u < s.ChildrenPerTable; u++ { + relations[u] = &schema.Table{ + Name: fmt.Sprintf("table%d_child%d", i, u), + Columns: columns, + Resolver: tableResolver, + } + if !s.NoPreResourceResolver { + relations[u].PreResourceResolver = preResourceResolver + } + } + s.tables[i] = &schema.Table{ + Name: fmt.Sprintf("table%d", i), + Columns: columns, + Relations: relations, + Resolver: tableResolver, + Multiplex: nMultiplexer(s.Clients), + } + if !s.NoPreResourceResolver { + s.tables[i].PreResourceResolver = preResourceResolver + } + for u := range relations { + relations[u].Parent = s.tables[i] + } + } + + plugin := NewPlugin( + "testPlugin", + "1.0.0", + s.tables, + newTestExecutionClient, + ) + plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(b)).Level(zerolog.WarnLevel)) + s.plugin = plugin + s.b = b +} + +func (s *Benchmark) simulateAPICall(clientID, tableName string) { + for { + s.apiCalls.Add(1) + err := s.Client.Call(clientID, tableName) + if err == nil { + // if no error, we are done + break + } + // if error, we have to retry + // we simulate a random backoff + time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) + } +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func (s *Benchmark) Run() { + for n := 0; n < s.b.N; n++ { + s.b.StopTimer() + ctx := context.Background() + spec := specs.Source{ + Name: "testSource", + Path: "cloudquery/testSource", + Tables: []string{"*"}, + Version: "v1.0.0", + Destinations: []string{"test"}, + Concurrency: s.Concurrency, + Scheduler: s.Scheduler, + } + if err := s.plugin.Init(ctx, spec); err != nil { + s.b.Fatal(err) + } + resources := make(chan *schema.Resource) + g, ctx := errgroup.WithContext(ctx) + g.Go(func() error { + defer close(resources) + return s.plugin.Sync(ctx, + time.Now(), + resources) + }) + s.b.StartTimer() + start := time.Now() + + totalResources := 0 + for range resources { + // read resources channel until empty + totalResources++ + } + if err := g.Wait(); err != nil { + s.b.Fatal(err) + } + + end := time.Now() + s.b.ReportMetric(0, "ns/op") // drop default ns/op output + s.b.ReportMetric(float64(totalResources)/(end.Sub(start).Seconds()), "resources/s") + + // Enable the below metrics for more verbose information about the scenario: + // s.b.ReportMetric(float64(s.apiCalls.Load())/(end.Sub(start).Seconds()), "api-calls/s") + // s.b.ReportMetric(float64(totalResources), "resources") + // s.b.ReportMetric(float64(s.apiCalls.Load()), "apiCalls") + } +} + +type benchmarkClient struct { + num int +} + +func (b benchmarkClient) ID() string { + return fmt.Sprintf("client%d", b.num) +} + +func nMultiplexer(n int) schema.Multiplexer { + return func(meta schema.ClientMeta) []schema.ClientMeta { + clients := make([]schema.ClientMeta, n) + for i := 0; i < n; i++ { + clients[i] = benchmarkClient{ + num: i, + } + } + return clients + } +} + +func BenchmarkDefaultConcurrencyDFS(b *testing.B) { + benchmarkWithScheduler(b, specs.SchedulerDFS) +} + +func BenchmarkDefaultConcurrencyRoundRobin(b *testing.B) { + benchmarkWithScheduler(b, specs.SchedulerRoundRobin) +} + +func benchmarkWithScheduler(b *testing.B, scheduler specs.Scheduler) { + b.ReportAllocs() + minTime := 1 * time.Millisecond + mean := 10 * time.Millisecond + stdDev := 100 * time.Millisecond + client := NewDefaultClient(minTime, mean, stdDev) + bs := BenchmarkScenario{ + Client: client, + Clients: 25, + Tables: 5, + Columns: 10, + ColumnResolvers: 1, + ResourcesPerTable: 100, + ResourcesPerPage: 50, + Scheduler: scheduler, + } + sb := NewBenchmark(b, bs) + sb.Run() +} + +func BenchmarkTablesWithChildrenDFS(b *testing.B) { + benchmarkTablesWithChildrenScheduler(b, specs.SchedulerDFS) +} + +func BenchmarkTablesWithChildrenRoundRobin(b *testing.B) { + benchmarkTablesWithChildrenScheduler(b, specs.SchedulerRoundRobin) +} + +func benchmarkTablesWithChildrenScheduler(b *testing.B, scheduler specs.Scheduler) { + b.ReportAllocs() + minTime := 1 * time.Millisecond + mean := 10 * time.Millisecond + stdDev := 100 * time.Millisecond + client := NewDefaultClient(minTime, mean, stdDev) + bs := BenchmarkScenario{ + Client: client, + Clients: 2, + Tables: 2, + ChildrenPerTable: 2, + Columns: 10, + ColumnResolvers: 1, + ResourcesPerTable: 100, + ResourcesPerPage: 50, + Scheduler: scheduler, + } + sb := NewBenchmark(b, bs) + sb.Run() +} + +type DefaultClient struct { + min, stdDev, mean time.Duration +} + +func NewDefaultClient(min, mean, stdDev time.Duration) *DefaultClient { + if min == 0 { + min = time.Millisecond + } + if mean == 0 { + mean = 10 * time.Millisecond + } + if stdDev == 0 { + stdDev = 100 * time.Millisecond + } + return &DefaultClient{ + min: min, + mean: mean, + stdDev: stdDev, + } +} + +func (c *DefaultClient) Call(_, _ string) error { + sample := int(rand.NormFloat64()*float64(c.stdDev) + float64(c.mean)) + duration := time.Duration(sample) + if duration < c.min { + duration = c.min + } + time.Sleep(duration) + return nil +} + +type RateLimitClient struct { + *DefaultClient + calls map[string][]time.Time + callsLock sync.Mutex + window time.Duration + maxCallsPerWindow int +} + +func NewRateLimitClient(min, mean, stdDev time.Duration, maxCallsPerWindow int, window time.Duration) *RateLimitClient { + return &RateLimitClient{ + DefaultClient: NewDefaultClient(min, mean, stdDev), + calls: map[string][]time.Time{}, + window: window, + maxCallsPerWindow: maxCallsPerWindow, + } +} + +func (r *RateLimitClient) Call(clientID, table string) error { + // this will sleep for the appropriate amount of time before responding + err := r.DefaultClient.Call(clientID, table) + if err != nil { + return err + } + + r.callsLock.Lock() + defer r.callsLock.Unlock() + + // limit the number of calls per window by table + key := table + + // remove calls from outside the call window + updated := make([]time.Time, 0, len(r.calls[key])) + for i := range r.calls[key] { + if time.Since(r.calls[key][i]) < r.window { + updated = append(updated, r.calls[key][i]) + } + } + + // return error if we've exceeded the max calls in the time window + if len(updated) >= r.maxCallsPerWindow { + return fmt.Errorf("rate limit exceeded") + } + + r.calls[key] = append(r.calls[key], time.Now()) + return nil +} + +// BenchmarkDefaultConcurrency represents a benchmark scenario where rate limiting is applied +// by the cloud provider. In this rate limiter, the limit is applied globally per table. +// This mirrors the behavior of GCP, where rate limiting is applied per project *token*, not +// per project. A good scheduler should spread the load across tables so that other tables can make +// progress while waiting for the rate limit to reset. +func BenchmarkTablesWithRateLimitingDFS(b *testing.B) { + benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerDFS) +} + +func BenchmarkTablesWithRateLimitingRoundRobin(b *testing.B) { + benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerRoundRobin) +} + +// In this benchmark, we set up a scenario where each table has a global rate limit of 1 call per 100ms. +// Every table requires 1 call to resolve, and has 10 clients. This means, at best, each table can resolve in 1 second. +// We have 100 such tables and a concurrency that allows 1000 calls at a time. A good scheduler for this scenario +// should be able to resolve all tables in a bit more than 1 second. +func benchmarkTablesWithRateLimitingScheduler(b *testing.B, scheduler specs.Scheduler) { + b.ReportAllocs() + minTime := 1 * time.Millisecond + mean := 1 * time.Millisecond + stdDev := 1 * time.Millisecond + maxCallsPerWindow := 1 + window := 100 * time.Millisecond + c := NewRateLimitClient(minTime, mean, stdDev, maxCallsPerWindow, window) + + bs := BenchmarkScenario{ + Client: c, + Scheduler: scheduler, + Clients: 10, + Tables: 100, + ChildrenPerTable: 0, + Columns: 10, + ColumnResolvers: 0, + ResourcesPerTable: 1, + ResourcesPerPage: 1, + Concurrency: 1000, + NoPreResourceResolver: true, + } + sb := NewBenchmark(b, bs) + sb.Run() +} diff --git a/plugin/docs.go b/plugin/docs.go new file mode 100644 index 0000000000..5827e5edcf --- /dev/null +++ b/plugin/docs.go @@ -0,0 +1,242 @@ +package plugin + +import ( + "bytes" + "embed" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "text/template" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/caser" + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +//go:embed templates/*.go.tpl +var templatesFS embed.FS + +var reMatchNewlines = regexp.MustCompile(`\n{3,}`) +var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) + +var DefaultTitleExceptions = map[string]string{ + // common abbreviations + "acl": "ACL", + "acls": "ACLs", + "api": "API", + "apis": "APIs", + "ca": "CA", + "cidr": "CIDR", + "cidrs": "CIDRs", + "db": "DB", + "dbs": "DBs", + "dhcp": "DHCP", + "iam": "IAM", + "iot": "IOT", + "ip": "IP", + "ips": "IPs", + "ipv4": "IPv4", + "ipv6": "IPv6", + "mfa": "MFA", + "ml": "ML", + "oauth": "OAuth", + "vpc": "VPC", + "vpcs": "VPCs", + "vpn": "VPN", + "vpns": "VPNs", + "waf": "WAF", + "wafs": "WAFs", + + // cloud providers + "aws": "AWS", + "gcp": "GCP", +} + +func DefaultTitleTransformer(table *schema.Table) string { + if table.Title != "" { + return table.Title + } + csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) + return csr.ToTitle(table.Name) +} + +func sortTables(tables schema.Tables) { + sort.SliceStable(tables, func(i, j int) bool { + return tables[i].Name < tables[j].Name + }) + + for _, table := range tables { + sortTables(table.Relations) + } +} + +type templateData struct { + PluginName string + Tables schema.Tables +} + +// GeneratePluginDocs creates table documentation for the source plugin based on its list of tables +func (p *Plugin) GeneratePluginDocs(tables schema.Tables, dir string, format pbPlugin.GenDocs_FORMAT) error { + if err := os.MkdirAll(dir, os.ModePerm); err != nil { + return err + } + + setDestinationManagedCqColumns(tables) + + sortedTables := make(schema.Tables, 0, len(tables)) + for _, t := range tables { + sortedTables = append(sortedTables, t.Copy(nil)) + } + sortTables(sortedTables) + + switch format { + case pbPlugin.GenDocs_FORMAT_MARKDOWN: + return p.renderTablesAsMarkdown(dir, p.name, sortedTables) + case pbPlugin.GenDocs_FORMAT_JSON: + return p.renderTablesAsJSON(dir, sortedTables) + default: + return fmt.Errorf("unsupported format: %v", format) + } +} + +// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). +func setDestinationManagedCqColumns(tables []*schema.Table) { + for _, table := range tables { + table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) + table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) + setDestinationManagedCqColumns(table.Relations) + } +} + +type jsonTable struct { + Name string `json:"name"` + Title string `json:"title"` + Description string `json:"description"` + Columns []jsonColumn `json:"columns"` + Relations []jsonTable `json:"relations"` +} + +type jsonColumn struct { + Name string `json:"name"` + Type string `json:"type"` + IsPrimaryKey bool `json:"is_primary_key,omitempty"` + IsIncrementalKey bool `json:"is_incremental_key,omitempty"` +} + +func (p *Plugin) renderTablesAsJSON(dir string, tables schema.Tables) error { + jsonTables := p.jsonifyTables(tables) + buffer := &bytes.Buffer{} + m := json.NewEncoder(buffer) + m.SetIndent("", " ") + m.SetEscapeHTML(false) + err := m.Encode(jsonTables) + if err != nil { + return err + } + outputPath := filepath.Join(dir, "__tables.json") + return os.WriteFile(outputPath, buffer.Bytes(), 0644) +} + +func (p *Plugin) jsonifyTables(tables schema.Tables) []jsonTable { + jsonTables := make([]jsonTable, len(tables)) + for i, table := range tables { + jsonColumns := make([]jsonColumn, len(table.Columns)) + for c, col := range table.Columns { + jsonColumns[c] = jsonColumn{ + Name: col.Name, + Type: col.Type.String(), + IsPrimaryKey: col.PrimaryKey, + IsIncrementalKey: col.IncrementalKey, + } + } + jsonTables[i] = jsonTable{ + Name: table.Name, + Title: p.titleTransformer(table), + Description: table.Description, + Columns: jsonColumns, + Relations: p.jsonifyTables(table.Relations), + } + } + return jsonTables +} + +func (p *Plugin) renderTablesAsMarkdown(dir string, pluginName string, tables schema.Tables) error { + for _, table := range tables { + if err := p.renderAllTables(table, dir); err != nil { + return err + } + } + t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ + "indentToDepth": indentToDepth, + }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template for README.md: %v", err) + } + + var b bytes.Buffer + if err := t.Execute(&b, templateData{PluginName: pluginName, Tables: tables}); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + outputPath := filepath.Join(dir, "README.md") + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return nil +} + +func (p *Plugin) renderAllTables(t *schema.Table, dir string) error { + if err := p.renderTable(t, dir); err != nil { + return err + } + for _, r := range t.Relations { + if err := p.renderAllTables(r, dir); err != nil { + return err + } + } + return nil +} + +func (p *Plugin) renderTable(table *schema.Table, dir string) error { + t := template.New("").Funcs(map[string]any{ + "title": p.titleTransformer, + }) + t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template: %v", err) + } + + outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) + + var b bytes.Buffer + if err := t.Execute(&b, table); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return f.Close() +} + +func formatMarkdown(s string) string { + s = reMatchNewlines.ReplaceAllString(s, "\n\n") + return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") +} + +func indentToDepth(table *schema.Table) string { + s := "" + t := table + for t.Parent != nil { + s += " " + t = t.Parent + } + return s +} diff --git a/plugin/docs_test.go b/plugin/docs_test.go new file mode 100644 index 0000000000..44e7b34afd --- /dev/null +++ b/plugin/docs_test.go @@ -0,0 +1,164 @@ +//go:build !windows + +package plugin + +import ( + "os" + "path" + "testing" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/bradleyjkemp/cupaloy/v2" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/stretchr/testify/require" +) + +var testTables = []*schema.Table{ + { + Name: "test_table", + Description: "Description for test table", + Columns: []schema.Column{ + { + Name: "int_col", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "id_col", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + { + Name: "id_col2", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + { + Name: "json_col", + Type: types.ExtensionTypes.JSON, + }, + { + Name: "list_col", + Type: arrow.ListOf(arrow.PrimitiveTypes.Int64), + }, + { + Name: "map_col", + Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), + }, + { + Name: "struct_col", + Type: arrow.StructOf(arrow.Field{Name: "string_field", Type: arrow.BinaryTypes.String}, arrow.Field{Name: "int_field", Type: arrow.PrimitiveTypes.Int64}), + }, + }, + Relations: []*schema.Table{ + { + Name: "relation_table", + Description: "Description for relational table", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + Relations: []*schema.Table{ + { + Name: "relation_relation_table_b", + Description: "Description for relational table's relation", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + }, + { + Name: "relation_relation_table_a", + Description: "Description for relational table's relation", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + }, + }, + }, + { + Name: "relation_table2", + Description: "Description for second relational table", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + }, + }, + }, + { + Name: "incremental_table", + Description: "Description for incremental table", + IsIncremental: true, + Columns: []schema.Column{ + { + Name: "int_col", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "id_col", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + IncrementalKey: true, + }, + { + Name: "id_col2", + Type: arrow.PrimitiveTypes.Int64, + IncrementalKey: true, + }, + }, + }, +} + +func TestGeneratePluginDocs(t *testing.T) { + p := NewPlugin("test", "v1.0.0", testTables, newTestExecutionClient) + + cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) + + t.Run("Markdown", func(t *testing.T) { + tmpdir := t.TempDir() + + err := p.GeneratePluginDocs(tmpdir, "markdown") + if err != nil { + t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) + } + + expectFiles := []string{"test_table.md", "relation_table.md", "relation_relation_table_a.md", "relation_relation_table_b.md", "incremental_table.md", "README.md"} + for _, exp := range expectFiles { + t.Run(exp, func(t *testing.T) { + output := path.Join(tmpdir, exp) + got, err := os.ReadFile(output) + require.NoError(t, err) + cup.SnapshotT(t, got) + }) + } + }) + + t.Run("JSON", func(t *testing.T) { + tmpdir := t.TempDir() + + err := p.GeneratePluginDocs(tmpdir, "json") + if err != nil { + t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) + } + + expectFiles := []string{"__tables.json"} + for _, exp := range expectFiles { + t.Run(exp, func(t *testing.T) { + output := path.Join(tmpdir, exp) + got, err := os.ReadFile(output) + require.NoError(t, err) + cup.SnapshotT(t, got) + }) + } + }) +} diff --git a/plugin/metrics.go b/plugin/metrics.go new file mode 100644 index 0000000000..182bc243a4 --- /dev/null +++ b/plugin/metrics.go @@ -0,0 +1,125 @@ +package plugin + +import ( + "sync/atomic" + "time" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +type Metrics struct { + TableClient map[string]map[string]*TableClientMetrics +} + +type TableClientMetrics struct { + Resources uint64 + Errors uint64 + Panics uint64 + StartTime time.Time + EndTime time.Time +} + +func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { + return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics +} + +// Equal compares to stats. Mostly useful in testing +func (s *Metrics) Equal(other *Metrics) bool { + for table, clientStats := range s.TableClient { + for client, stats := range clientStats { + if _, ok := other.TableClient[table]; !ok { + return false + } + if _, ok := other.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(other.TableClient[table][client]) { + return false + } + } + } + for table, clientStats := range other.TableClient { + for client, stats := range clientStats { + if _, ok := s.TableClient[table]; !ok { + return false + } + if _, ok := s.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(s.TableClient[table][client]) { + return false + } + } + } + return true +} + +func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { + s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) + for _, client := range clients { + s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} + } + for _, relation := range table.Relations { + s.initWithClients(relation, clients) + } +} + +func (s *Metrics) TotalErrors() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Errors + } + } + return total +} + +func (s *Metrics) TotalErrorsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Errors) + } + } + return total +} + +func (s *Metrics) TotalPanics() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Panics + } + } + return total +} + +func (s *Metrics) TotalPanicsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Panics) + } + } + return total +} + +func (s *Metrics) TotalResources() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Resources + } + } + return total +} + +func (s *Metrics) TotalResourcesAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Resources) + } + } + return total +} diff --git a/plugin/metrics_test.go b/plugin/metrics_test.go new file mode 100644 index 0000000000..a566edee5d --- /dev/null +++ b/plugin/metrics_test.go @@ -0,0 +1,37 @@ +package plugin + +import "testing" + +func TestMetrics(t *testing.T) { + s := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + s.TableClient["test_table"] = make(map[string]*TableClientMetrics) + s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if s.TotalResources() != 1 { + t.Fatal("expected 1 resource") + } + if s.TotalErrors() != 2 { + t.Fatal("expected 2 error") + } + if s.TotalPanics() != 3 { + t.Fatal("expected 3 panics") + } + + other := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + other.TableClient["test_table"] = make(map[string]*TableClientMetrics) + other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if !s.Equal(other) { + t.Fatal("expected metrics to be equal") + } +} diff --git a/plugin/options.go b/plugin/options.go new file mode 100644 index 0000000000..1290b7cd56 --- /dev/null +++ b/plugin/options.go @@ -0,0 +1,46 @@ +package plugin + +import ( + "context" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +type GetTables func(ctx context.Context, c Client) (schema.Tables, error) + +type Option func(*Plugin) + +// WithDynamicTableOption allows the plugin to return list of tables after call to New +func WithDynamicTableOption(getDynamicTables GetTables) Option { + return func(p *Plugin) { + p.getDynamicTables = getDynamicTables + } +} + +// WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables +func WithNoInternalColumns() Option { + return func(p *Plugin) { + p.internalColumns = false + } +} + +func WithUnmanaged() Option { + return func(p *Plugin) { + p.unmanaged = true + } +} + +// WithTitleTransformer allows the plugin to control how table names get turned into titles for the +// generated documentation. +func WithTitleTransformer(t func(*schema.Table) string) Option { + return func(p *Plugin) { + p.titleTransformer = t + } +} + + +func WithStaticTables(tables schema.Tables) Option { + return func(p *Plugin) { + p.staticTables = tables + } +} \ No newline at end of file diff --git a/plugin/plugin.go b/plugin/plugin.go new file mode 100644 index 0000000000..e1efa19cb1 --- /dev/null +++ b/plugin/plugin.go @@ -0,0 +1,326 @@ +package plugin + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/backend" + "github.com/cloudquery/plugin-sdk/v3/caser" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" + "golang.org/x/sync/semaphore" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" +) + +type Options struct { + Backend backend.Backend +} + +type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) + +type NewClientFunc func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) + +type UnmanagedClient interface { + schema.ClientMeta + Sync(ctx context.Context, metrics *Metrics, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error +} + +type Client interface { + Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error + Migrate(ctx context.Context, tables schema.Tables) error + Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error + DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error + Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error +} + +type UnimplementedWriter struct{} + +func (UnimplementedWriter) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { + return fmt.Errorf("not implemented") +} + +type UnimplementedSync struct{} + +func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { + return fmt.Errorf("not implemented") +} + +// Plugin is the base structure required to pass to sdk.serve +// We take a declarative approach to API here similar to Cobra +type Plugin struct { + // Name of plugin i.e aws,gcp, azure etc' + name string + // Version of the plugin + version string + // Called upon init call to validate and init configuration + newClient NewClientFunc + // dynamic table function if specified + getDynamicTables GetTables + // Tables are static tables that defined in compile time by the plugin + staticTables schema.Tables + // status sync metrics + metrics *Metrics + // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. + logger zerolog.Logger + // resourceSem is a semaphore that limits the number of concurrent resources being fetched + resourceSem *semaphore.Weighted + // tableSem is a semaphore that limits the number of concurrent tables being fetched + tableSems []*semaphore.Weighted + // maxDepth is the max depth of tables + maxDepth uint64 + // caser + caser *caser.Caser + // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) + mu sync.Mutex + + // client is the initialized session client + client Client + // sessionTables are the + sessionTables schema.Tables + // backend is the backend used to store the cursor state + backend backend.Backend + // spec is the spec the client was initialized with + spec pbPlugin.Spec + // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id + // useful for sources such as PostgreSQL and other databases + internalColumns bool + // unmanaged if set to true then the plugin will call Sync directly and not use the scheduler + unmanaged bool + // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation + titleTransformer func(*schema.Table) string + syncTime time.Time +} + +const ( + maxAllowedDepth = 4 +) + +// Add internal columns +func (p *Plugin) addInternalColumns(tables []*schema.Table) error { + for _, table := range tables { + if c := table.Column("_cq_id"); c != nil { + return fmt.Errorf("table %s already has column _cq_id", table.Name) + } + cqID := schema.CqIDColumn + if len(table.PrimaryKeys()) == 0 { + cqID.PrimaryKey = true + } + cqSourceName := schema.CqSourceNameColumn + cqSyncTime := schema.CqSyncTimeColumn + cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.spec.Name) + } + cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.syncTime) + } + + table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) + if err := p.addInternalColumns(table.Relations); err != nil { + return err + } + } + return nil +} + +// Set parent links on relational tables +func setParents(tables schema.Tables, parent *schema.Table) { + for _, table := range tables { + table.Parent = parent + setParents(table.Relations, table) + } +} + +// Apply transformations to tables +func transformTables(tables schema.Tables) error { + for _, table := range tables { + if table.Transform != nil { + if err := table.Transform(table); err != nil { + return fmt.Errorf("failed to transform table %s: %w", table.Name, err) + } + } + if err := transformTables(table.Relations); err != nil { + return err + } + } + return nil +} + +func maxDepth(tables schema.Tables) uint64 { + var depth uint64 + if len(tables) == 0 { + return 0 + } + for _, table := range tables { + newDepth := 1 + maxDepth(table.Relations) + if newDepth > depth { + depth = newDepth + } + } + return depth +} + +func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { + p := Plugin{ + name: name, + version: version, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, + } + for _, opt := range options { + opt(&p) + } + if p.staticTables != nil { + if p.internalColumns { + if err := p.addInternalColumns(p.staticTables); err != nil { + panic(err) + } + } + p.maxDepth = maxDepth(p.staticTables) + if p.maxDepth > maxAllowedDepth { + panic(fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth)) + } + if err := p.validate(p.staticTables); err != nil { + panic(err) + } + } + + return &p +} + +// Name return the name of this plugin +func (p *Plugin) Name() string { + return p.name +} + +// Version returns the version of this plugin +func (p *Plugin) Version() string { + return p.version +} + + +func (p *Plugin) SetLogger(logger zerolog.Logger) { + p.logger = logger.With().Str("module", p.name+"-src").Logger() +} + +// Tables returns all tables supported by this source plugin +func (p *Plugin) StaticTables() schema.Tables { + return p.staticTables +} + +func (p *Plugin) HasDynamicTables() bool { + return p.getDynamicTables != nil +} + +func (p *Plugin) DynamicTables() schema.Tables { + return p.sessionTables +} + +func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + return p.client.Read(ctx, table, sourceName, res) +} + +func (p *Plugin) Metrics() *Metrics { + return p.metrics +} + +func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + + var err error + p.client, err = p.newClient(ctx, p.logger, spec) + if err != nil { + return fmt.Errorf("failed to initialize client: %w", err) + } + p.spec = spec + + return nil +} + +func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables) error { + return p.client.Migrate(ctx, tables) +} + +func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { + return p.client.Write(ctx, tables, res) +} + +func (p *Plugin) Write(ctx context.Context, sourceSpec pbPlugin.Spec, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { + syncTime = syncTime.UTC() + if err := p.client.Write(ctx, tables, res); err != nil { + return err + } + if p.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE { + tablesToDelete := tables + if sourceSpec.BackendSpec != nil { + tablesToDelete = make(schema.Tables, 0, len(tables)) + for _, t := range tables { + if !t.IsIncremental { + tablesToDelete = append(tablesToDelete, t) + } + } + } + if err := p.DeleteStale(ctx, tablesToDelete, sourceSpec.Name, syncTime); err != nil { + return err + } + } + return nil +} + +func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + syncTime = syncTime.UTC() + return p.client.DeleteStale(ctx, tables, sourceName, syncTime) +} + +// Sync is syncing data from the requested tables in spec to the given channel +func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + p.syncTime = syncTime + + startTime := time.Now() + if p.unmanaged { + unmanagedClient := p.client.(UnmanagedClient) + if err := unmanagedClient.Sync(ctx, p.metrics, syncSpec, res); err != nil { + return fmt.Errorf("failed to sync unmanaged client: %w", err) + } + } else { + switch syncSpec.Scheduler { + case pbPlugin.SyncSpec_SCHEDULER_DFS: + p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, res) + case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: + p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, res) + default: + return fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String()) + } + } + + p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") + return nil +} + +func (p *Plugin) Close(ctx context.Context) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + if p.backend != nil { + err := p.backend.Close(ctx) + if err != nil { + return fmt.Errorf("failed to close backend: %w", err) + } + p.backend = nil + } + return nil +} diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go new file mode 100644 index 0000000000..9c4c094d6f --- /dev/null +++ b/plugin/plugin_round_robin_test.go @@ -0,0 +1,148 @@ +package plugin + +import ( + "context" + "fmt" + "sync" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" +) + +type testPluginClient struct { + memoryDB map[string][]arrow.Record + tables map[string]*schema.Table + memoryDBLock sync.RWMutex +} + +type testPluginSpec struct { + ConnectionString string `json:"connection_string"` +} + +func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { + return nil +} + +func (c *testPluginClient) Migrate(ctx context.Context, tables schema.Tables) error { + for _, table := range tables { + tableName := table.Name + memTable := c.memoryDB[tableName] + if memTable == nil { + c.memoryDB[tableName] = make([]arrow.Record, 0) + c.tables[tableName] = table + continue + } + + changes := table.GetChanges(c.tables[tableName]) + // memdb doesn't support any auto-migrate + if changes == nil { + continue + } + c.memoryDB[tableName] = make([]arrow.Record, 0) + c.tables[tableName] = table + } + return nil + return nil +} + +func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, resources <-chan arrow.Record) error { + for resource := range resources { + c.memoryDBLock.Lock() + sc := resource.Schema() + tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) + if !ok { + return fmt.Errorf("table name not found in schema metadata") + } + table := c.tables[tableName] + if c.spec.WriteMode == specs.WriteModeAppend { + c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) + } else { + c.overwrite(table, resource) + } + c.memoryDBLock.Unlock() + } + return nil +} + +func (c *testPluginClient) overwrite(table *schema.Table, data arrow.Record) { + pksIndex := table.PrimaryKeysIndexes() + tableName := table.Name + for i, row := range c.memoryDB[tableName] { + found := true + for _, pkIndex := range pksIndex { + s1 := data.Column(pkIndex).String() + s2 := row.Column(pkIndex).String() + if s1 != s2 { + found = false + } + } + if found { + c.memoryDB[tableName] = append(c.memoryDB[tableName][:i], c.memoryDB[tableName][i+1:]...) + c.memoryDB[tableName] = append(c.memoryDB[tableName], data) + return + } + } + c.memoryDB[tableName] = append(c.memoryDB[tableName], data) +} + +func (c *testPluginClient) deleteStaleTable(_ context.Context, table *schema.Table, source string, syncTime time.Time) { + sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) + syncColIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) + tableName := table.Name + var filteredTable []arrow.Record + for i, row := range c.memoryDB[tableName] { + if row.Column(sourceColIndex).(*array.String).Value(0) == source { + rowSyncTime := row.Column(syncColIndex).(*array.Timestamp).Value(0).ToTime(arrow.Microsecond).UTC() + if !rowSyncTime.Before(syncTime) { + filteredTable = append(filteredTable, c.memoryDB[tableName][i]) + } + } + } + c.memoryDB[tableName] = filteredTable +} + +func (c *testPluginClient) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + return nil +} + +func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + tableName := table.Name + if c.memoryDB[tableName] == nil { + return nil + } + sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) + if sourceColIndex == -1 { + return fmt.Errorf("table %s doesn't have source column", tableName) + } + var sortedRes []arrow.Record + c.memoryDBLock.RLock() + for _, row := range c.memoryDB[tableName] { + arr := row.Column(sourceColIndex) + if arr.(*array.String).Value(0) == sourceName { + sortedRes = append(sortedRes, row) + } + } + c.memoryDBLock.RUnlock() + + for _, row := range sortedRes { + res <- row + } + return nil +} + +func NewTestPluginClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { + return &testPluginClient{ + memoryDB: make(map[string][]arrow.Record), + tables: make(map[string]*schema.Table), + }, nil +} + +func TestPluginRoundRobin(t *testing.T) { + p := NewPlugin("test", "v0.0.0", NewTestPluginClient) +} \ No newline at end of file diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go new file mode 100644 index 0000000000..16afc7338c --- /dev/null +++ b/plugin/plugin_test.go @@ -0,0 +1,470 @@ +package plugin + +import ( + "context" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/scalar" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v3/transformers" + "github.com/google/go-cmp/cmp" + "github.com/google/uuid" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "golang.org/x/sync/errgroup" +) + +type testExecutionClient struct{} + +var _ schema.ClientMeta = &testExecutionClient{} + +var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") +var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") + +var testSyncTime = time.Now() + +func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { + res <- map[string]any{ + "TestColumn": 3, + } + return nil +} + +func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { + panic("Resolver") +} + +func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { + panic("PreResourceResolver") +} + +func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { + panic("ColumnResolver") +} + +func testTableSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableSuccessWithPK() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + }, + } +} + +func testTableResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_resolver_panic", + Resolver: testResolverPanic, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTablePreResourceResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_pre_resource_resolver_panic", + PreResourceResolver: testPreResourceResolverPanic, + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableColumnResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_column_resolver_panic", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "test_column1", + Type: arrow.PrimitiveTypes.Int64, + Resolver: testColumnResolverPanic, + }, + }, + } +} + +func testTableRelationSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_relation_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + Relations: []*schema.Table{ + testTableSuccess(), + }, + } +} + +func (*testExecutionClient) ID() string { + return "testExecutionClient" +} + +func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) { + return &testExecutionClient{}, nil +} + +type syncTestCase struct { + table *schema.Table + stats Metrics + data []scalar.Vector + deterministicCQID bool +} + +var syncTestCases = []syncTestCase{ + { + table: testTableSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableResolverPanic(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_resolver_panic": { + "testExecutionClient": { + Panics: 1, + }, + }, + }, + }, + data: nil, + }, + { + table: testTablePreResourceResolverPanic(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_pre_resource_resolver_panic": { + "testExecutionClient": { + Panics: 1, + }, + }, + }, + }, + data: nil, + }, + + { + table: testTableRelationSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_relation_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableColumnResolverPanic(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_column_resolver_panic": { + "testExecutionClient": { + Panics: 1, + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int64{}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableRelationSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_relation_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableSuccessWithPK(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: deterministicStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, +} + +type testRand struct{} + +func (testRand) Read(p []byte) (n int, err error) { + for i := range p { + p[i] = byte(0) + } + return len(p), nil +} + +func TestSync(t *testing.T) { + uuid.SetRand(testRand{}) + for _, scheduler := range specs.AllSchedulers { + for _, tc := range syncTestCases { + tc := tc + tc.table = tc.table.Copy(nil) + t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { + testSyncTable(t, tc, scheduler, tc.deterministicCQID) + }) + } + } +} + +func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, deterministicCQID bool) { + ctx := context.Background() + tables := []*schema.Table{ + tc.table, + } + + plugin := NewPlugin( + "testSourcePlugin", + "1.0.0", + tables, + newTestExecutionClient, + ) + plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) + spec := specs.Source{ + Name: "testSource", + Path: "cloudquery/testSource", + Tables: []string{"*"}, + Version: "v1.0.0", + Destinations: []string{"test"}, + Concurrency: 1, // choose a very low value to check that we don't run into deadlocks + Scheduler: scheduler, + DeterministicCQID: deterministicCQID, + } + if err := plugin.Init(ctx, spec); err != nil { + t.Fatal(err) + } + + resources := make(chan *schema.Resource) + g, ctx := errgroup.WithContext(ctx) + g.Go(func() error { + defer close(resources) + return plugin.Sync(ctx, + testSyncTime, + resources) + }) + + var i int + for resource := range resources { + if tc.data == nil { + t.Fatalf("Unexpected resource %v", resource) + } + if i >= len(tc.data) { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } + if !resource.GetValues().Equal(tc.data[i]) { + t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], resource.GetValues()) + } + i++ + } + if len(tc.data) != i { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } + + stats := plugin.Metrics() + if !tc.stats.Equal(stats) { + t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) + } + if err := g.Wait(); err != nil { + t.Fatal(err) + } +} + +func TestIgnoredColumns(t *testing.T) { + validateResources(t, schema.Resources{{ + Item: struct{ A *string }{}, + Table: &schema.Table{ + Columns: schema.ColumnList{ + { + Name: "a", + Type: arrow.BinaryTypes.String, + IgnoreInTests: true, + }, + }, + }, + }}) +} + +var testTable struct { + PrimaryKey string + SecondaryKey string + TertiaryKey string + Quaternary string +} + +func TestNewPluginPrimaryKeys(t *testing.T) { + testTransforms := []struct { + transformerOptions []transformers.StructTransformerOption + resultKeys []string + }{ + { + transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, + resultKeys: []string{"primary_key"}, + }, + { + transformerOptions: []transformers.StructTransformerOption{}, + resultKeys: []string{"_cq_id"}, + }, + } + for _, tc := range testTransforms { + tables := []*schema.Table{ + { + Name: "test_table", + Transform: transformers.TransformWithStruct( + &testTable, tc.transformerOptions..., + ), + }, + } + + plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) + assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) + } +} diff --git a/plugin/scheduler.go b/plugin/scheduler.go new file mode 100644 index 0000000000..373147d194 --- /dev/null +++ b/plugin/scheduler.go @@ -0,0 +1,163 @@ +package plugin + +import ( + "context" + "errors" + "fmt" + "runtime/debug" + "sync" + "sync/atomic" + "time" + + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/getsentry/sentry-go" + "github.com/rs/zerolog" + "github.com/thoas/go-funk" +) + +const ( + minTableConcurrency = 1 + minResourceConcurrency = 100 +) + +const periodicMetricLoggerInterval = 30 * time.Second + +func (p *Plugin) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { + clientName := client.ID() + for _, table := range tables { + metrics := p.metrics.TableClient[table.Name][clientName] + p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") + p.logTablesMetrics(table.Relations, client) + } +} + +func (p *Plugin) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { + var validationErr *schema.ValidationError + ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + resource := schema.NewResourceData(table, parent, item) + objectStartTime := time.Now() + clientID := client.ID() + tableMetrics := p.metrics.TableClient[table.Name][clientID] + logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + if table.PreResourceResolver != nil { + if err := table.PreResourceResolver(ctx, client, resource); err != nil { + logger.Error().Err(err).Msg("pre resource resolver failed") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + return nil + } + } + + for _, c := range table.Columns { + p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) + } + + if table.PostResourceResolver != nil { + if err := table.PostResourceResolver(ctx, client, resource); err != nil { + logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + atomic.AddUint64(&tableMetrics.Resources, 1) + return resource +} + +func (p *Plugin) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { + var validationErr *schema.ValidationError + columnStartTime := time.Now() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + + if c.Resolver != nil { + if err := c.Resolver(ctx, client, resource, c); err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } else { + // base use case: try to get column with CamelCase name + v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) + if v != nil { + err := resource.Set(c.Name, v) + if err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + } +} + +func (p *Plugin) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { + defer wg.Done() + + ticker := time.NewTicker(periodicMetricLoggerInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + p.logger.Info(). + Uint64("total_resources", p.metrics.TotalResourcesAtomic()). + Uint64("total_errors", p.metrics.TotalErrorsAtomic()). + Uint64("total_panics", p.metrics.TotalPanicsAtomic()). + Msg("Sync in progress") + } + } +} + +// unparam's suggestion to remove the second parameter is not good advice here. +// nolint:unparam +func max(a, b uint64) uint64 { + if a > b { + return a + } + return b +} diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go new file mode 100644 index 0000000000..9390966395 --- /dev/null +++ b/plugin/scheduler_dfs.go @@ -0,0 +1,230 @@ +package plugin + +import ( + "context" + "errors" + "fmt" + "runtime/debug" + "sync" + "sync/atomic" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/helpers" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/getsentry/sentry-go" + "golang.org/x/sync/semaphore" +) + +func (p *Plugin) syncDfs(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { + // This is very similar to the concurrent web crawler problem with some minor changes. + // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. + tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) + resourceConcurrency := tableConcurrency * minResourceConcurrency + + p.tableSems = make([]*semaphore.Weighted, p.maxDepth) + for i := uint64(0); i < p.maxDepth; i++ { + p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + // reduce table concurrency logarithmically for every depth level + tableConcurrency = max(tableConcurrency/2, minTableConcurrency) + } + p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + + // we have this because plugins can return sometimes clients in a random way which will cause + // differences between this run and the next one. + preInitialisedClients := make([][]schema.ClientMeta, len(tables)) + for i, table := range tables { + clients := []schema.ClientMeta{client.(schema.ClientMeta)} + if table.Multiplex != nil { + clients = table.Multiplex(client.(schema.ClientMeta)) + } + // Detect duplicate clients while multiplexing + seenClients := make(map[string]bool) + for _, c := range clients { + if _, ok := seenClients[c.ID()]; !ok { + seenClients[c.ID()] = true + } else { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage("duplicate client ID in " + table.Name) + }) + p.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") + } + } + preInitialisedClients[i] = clients + // we do this here to avoid locks so we initial the metrics structure once in the main goroutines + // and then we can just read from it in the other goroutines concurrently given we are not writing to it. + p.metrics.initWithClients(table, clients) + } + + // We start a goroutine that logs the metrics periodically. + // It needs its own waitgroup + var logWg sync.WaitGroup + logWg.Add(1) + + logCtx, logCancel := context.WithCancel(ctx) + go p.periodicMetricLogger(logCtx, &logWg) + + var wg sync.WaitGroup + for i, table := range tables { + table := table + clients := preInitialisedClients[i] + for _, client := range clients { + client := client + if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + // This means context was cancelled + wg.Wait() + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() + return + } + wg.Add(1) + go func() { + defer wg.Done() + defer p.tableSems[0].Release(1) + // not checking for error here as nothing much todo. + // the error is logged and this happens when context is cancelled + p.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) + }() + } + } + + // Wait for all the worker goroutines to finish + wg.Wait() + + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() +} + +func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { + var validationErr *schema.ValidationError + clientName := client.ID() + logger := p.logger.With().Str("table", table.Name).Str("client", clientName).Logger() + + if parent == nil { // Log only for root tables, otherwise we spam too much. + logger.Info().Msg("top level table resolver started") + } + tableMetrics := p.metrics.TableClient[table.Name][clientName] + + res := make(chan any) + go func() { + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + logger.Error().Interface("error", err).Str("stack", stack).Msg("table resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + } + close(res) + }() + if err := table.Resolver(ctx, client, parent, res); err != nil { + logger.Error().Err(err).Msg("table resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + return + } + }() + + for r := range res { + p.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) + } + + // we don't need any waitgroups here because we are waiting for the channel to close + if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. + logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") + p.logTablesMetrics(table.Relations, client) + } +} + +func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { + resourcesSlice := helpers.InterfaceSlice(resources) + if len(resourcesSlice) == 0 { + return + } + resourcesChan := make(chan *schema.Resource, len(resourcesSlice)) + go func() { + defer close(resourcesChan) + var wg sync.WaitGroup + sentValidationErrors := sync.Map{} + for i := range resourcesSlice { + i := i + if err := p.resourceSem.Acquire(ctx, 1); err != nil { + p.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") + wg.Wait() + // we have to continue emptying the channel to exit gracefully + return + } + wg.Add(1) + go func() { + defer p.resourceSem.Release(1) + defer wg.Done() + //nolint:all + resolvedResource := p.resolveResource(ctx, table, client, parent, resourcesSlice[i]) + if resolvedResource == nil { + return + } + + if err := resolvedResource.CalculateCQID(p.spec.SyncSpec.DetrministicCqId); err != nil { + tableMetrics := p.metrics.TableClient[table.Name][client.ID()] + p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") + if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { + // send resource validation errors to Sentry only once per table, + // to avoid sending too many duplicate messages + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(err.Error()) + }) + } + atomic.AddUint64(&tableMetrics.Errors, 1) + return + } + if err := resolvedResource.Validate(); err != nil { + tableMetrics := p.metrics.TableClient[table.Name][client.ID()] + p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") + if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { + // send resource validation errors to Sentry only once per table, + // to avoid sending too many duplicate messages + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(err.Error()) + }) + } + atomic.AddUint64(&tableMetrics.Errors, 1) + return + } + resourcesChan <- resolvedResource + }() + } + wg.Wait() + }() + + var wg sync.WaitGroup + for resource := range resourcesChan { + resource := resource + resolvedResources <- resource + for _, relation := range resource.Table.Relations { + relation := relation + if err := p.tableSems[depth].Acquire(ctx, 1); err != nil { + // This means context was cancelled + wg.Wait() + return + } + wg.Add(1) + go func() { + defer wg.Done() + defer p.tableSems[depth].Release(1) + p.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) + }() + } + } + wg.Wait() +} diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go new file mode 100644 index 0000000000..0554f5489e --- /dev/null +++ b/plugin/scheduler_round_robin.go @@ -0,0 +1,104 @@ +package plugin + +import ( + "context" + "sync" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/schema" + "golang.org/x/sync/semaphore" +) + +type tableClient struct { + table *schema.Table + client schema.ClientMeta +} + +func (p *Plugin) syncRoundRobin(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { + tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) + resourceConcurrency := tableConcurrency * minResourceConcurrency + + p.tableSems = make([]*semaphore.Weighted, p.maxDepth) + for i := uint64(0); i < p.maxDepth; i++ { + p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + // reduce table concurrency logarithmically for every depth level + tableConcurrency = max(tableConcurrency/2, minTableConcurrency) + } + p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + + // we have this because plugins can return sometimes clients in a random way which will cause + // differences between this run and the next one. + preInitialisedClients := make([][]schema.ClientMeta, len(tables)) + for i, table := range tables { + clients := []schema.ClientMeta{client.(schema.ClientMeta)} + if table.Multiplex != nil { + clients = table.Multiplex(client.(schema.ClientMeta)) + } + preInitialisedClients[i] = clients + // we do this here to avoid locks so we initial the metrics structure once in the main goroutines + // and then we can just read from it in the other goroutines concurrently given we are not writing to it. + p.metrics.initWithClients(table, clients) + } + + // We start a goroutine that logs the metrics periodically. + // It needs its own waitgroup + var logWg sync.WaitGroup + logWg.Add(1) + + logCtx, logCancel := context.WithCancel(ctx) + go p.periodicMetricLogger(logCtx, &logWg) + + tableClients := roundRobinInterleave(tables, preInitialisedClients) + + var wg sync.WaitGroup + for _, tc := range tableClients { + table := tc.table + cl := tc.client + if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + // This means context was cancelled + wg.Wait() + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() + return + } + wg.Add(1) + go func() { + defer wg.Done() + defer p.tableSems[0].Release(1) + // not checking for error here as nothing much to do. + // the error is logged and this happens when context is cancelled + // Round Robin currently uses the DFS algorithm to resolve the tables, but this + // may change in the future. + p.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) + }() + } + + // Wait for all the worker goroutines to finish + wg.Wait() + + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() +} + +// interleave table-clients so that we get: +// table1-client1, table2-client1, table3-client1, table1-client2, table2-client2, table3-client2, ... +func roundRobinInterleave(tables schema.Tables, preInitialisedClients [][]schema.ClientMeta) []tableClient { + tableClients := make([]tableClient, 0) + c := 0 + for { + addedNew := false + for i, table := range tables { + if c < len(preInitialisedClients[i]) { + tableClients = append(tableClients, tableClient{table: table, client: preInitialisedClients[i][c]}) + addedNew = true + } + } + c++ + if !addedNew { + break + } + } + return tableClients +} diff --git a/plugin/scheduler_round_robin_test.go b/plugin/scheduler_round_robin_test.go new file mode 100644 index 0000000000..daf7cc242f --- /dev/null +++ b/plugin/scheduler_round_robin_test.go @@ -0,0 +1,65 @@ +package plugin + +import ( + "testing" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +func TestRoundRobinInterleave(t *testing.T) { + table1 := &schema.Table{Name: "test_table"} + table2 := &schema.Table{Name: "test_table2"} + client1 := &testExecutionClient{} + client2 := &testExecutionClient{} + client3 := &testExecutionClient{} + cases := []struct { + name string + tables schema.Tables + preInitialisedClients [][]schema.ClientMeta + want []tableClient + }{ + { + name: "single table", + tables: schema.Tables{table1}, + preInitialisedClients: [][]schema.ClientMeta{{client1}}, + want: []tableClient{{table: table1, client: client1}}, + }, + { + name: "two tables with different clients", + tables: schema.Tables{table1, table2}, + preInitialisedClients: [][]schema.ClientMeta{{client1}, {client1, client2}}, + want: []tableClient{ + {table: table1, client: client1}, + {table: table2, client: client1}, + {table: table2, client: client2}, + }, + }, + { + name: "two tables with different clients", + tables: schema.Tables{table1, table2}, + preInitialisedClients: [][]schema.ClientMeta{{client1, client3}, {client1, client2}}, + want: []tableClient{ + {table: table1, client: client1}, + {table: table2, client: client1}, + {table: table1, client: client3}, + {table: table2, client: client2}, + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := roundRobinInterleave(tc.tables, tc.preInitialisedClients) + if len(got) != len(tc.want) { + t.Fatalf("got %d tableClients, want %d", len(got), len(tc.want)) + } + for i := range got { + if got[i].table != tc.want[i].table { + t.Errorf("got table %v, want %v", got[i].table, tc.want[i].table) + } + if got[i].client != tc.want[i].client { + t.Errorf("got client %v, want %v", got[i].client, tc.want[i].client) + } + } + }) + } +} diff --git a/plugin/templates/all_tables.md.go.tpl b/plugin/templates/all_tables.md.go.tpl new file mode 100644 index 0000000000..008afb66fd --- /dev/null +++ b/plugin/templates/all_tables.md.go.tpl @@ -0,0 +1,5 @@ +# Source Plugin: {{.PluginName}} +## Tables +{{- range $table := $.Tables }} +{{- template "all_tables_entry.md.go.tpl" $table}} +{{- end }} \ No newline at end of file diff --git a/plugin/templates/all_tables_entry.md.go.tpl b/plugin/templates/all_tables_entry.md.go.tpl new file mode 100644 index 0000000000..6166b1983b --- /dev/null +++ b/plugin/templates/all_tables_entry.md.go.tpl @@ -0,0 +1,5 @@ + +{{. | indentToDepth}}- [{{.Name}}]({{.Name}}.md){{ if .IsIncremental}} (Incremental){{ end }} +{{- range $index, $rel := .Relations}} +{{- template "all_tables_entry.md.go.tpl" $rel}} +{{- end}} \ No newline at end of file diff --git a/plugin/templates/table.md.go.tpl b/plugin/templates/table.md.go.tpl new file mode 100644 index 0000000000..21a8ed135e --- /dev/null +++ b/plugin/templates/table.md.go.tpl @@ -0,0 +1,44 @@ +# Table: {{$.Name}} + +This table shows data for {{.|title}}. + +{{ $.Description }} +{{ $length := len $.PrimaryKeys -}} +{{ if eq $length 1 }} +The primary key for this table is **{{ index $.PrimaryKeys 0 }}**. +{{ else }} +The composite primary key for this table is ({{ range $index, $pk := $.PrimaryKeys -}} + {{if $index }}, {{end -}} + **{{$pk}}** + {{- end -}}). +{{ end }} +{{- if $.IsIncremental -}} +It supports incremental syncs +{{- $ikLength := len $.IncrementalKeys -}} +{{- if eq $ikLength 1 }} based on the **{{ index $.IncrementalKeys 0 }}** column +{{- else if gt $ikLength 1 }} based on the ({{ range $index, $pk := $.IncrementalKeys -}} + {{- if $index -}}, {{end -}} + **{{$pk}}** + {{- end -}}) columns +{{- end -}}. +{{- end -}} + +{{- if or ($.Relations) ($.Parent) }} +## Relations +{{- end }} +{{- if $.Parent }} +This table depends on [{{ $.Parent.Name }}]({{ $.Parent.Name }}.md). +{{- end}} +{{ if $.Relations }} +The following tables depend on {{.Name}}: +{{- range $rel := $.Relations }} + - [{{ $rel.Name }}]({{ $rel.Name }}.md) +{{- end }} +{{- end }} + +## Columns +| Name | Type | +| ------------- | ------------- | +{{- range $column := $.Columns }} +|{{$column.Name}}{{if $column.PrimaryKey}} (PK){{end}}{{if $column.IncrementalKey}} (Incremental Key){{end}}|{{$column.Type}}| +{{- end }} \ No newline at end of file diff --git a/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json b/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json new file mode 100644 index 0000000000..7a8280833e --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json @@ -0,0 +1,214 @@ +[ + { + "name": "incremental_table", + "title": "Incremental Table", + "description": "Description for incremental table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid" + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "int_col", + "type": "int64" + }, + { + "name": "id_col", + "type": "int64", + "is_primary_key": true, + "is_incremental_key": true + }, + { + "name": "id_col2", + "type": "int64", + "is_incremental_key": true + } + ], + "relations": [] + }, + { + "name": "test_table", + "title": "Test Table", + "description": "Description for test table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid" + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "int_col", + "type": "int64" + }, + { + "name": "id_col", + "type": "int64", + "is_primary_key": true + }, + { + "name": "id_col2", + "type": "int64", + "is_primary_key": true + }, + { + "name": "json_col", + "type": "json" + }, + { + "name": "list_col", + "type": "list" + }, + { + "name": "map_col", + "type": "map" + }, + { + "name": "struct_col", + "type": "struct" + } + ], + "relations": [ + { + "name": "relation_table", + "title": "Relation Table", + "description": "Description for relational table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [ + { + "name": "relation_relation_table_a", + "title": "Relation Relation Table A", + "description": "Description for relational table's relation", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [] + }, + { + "name": "relation_relation_table_b", + "title": "Relation Relation Table B", + "description": "Description for relational table's relation", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [] + } + ] + }, + { + "name": "relation_table2", + "title": "Relation Table2", + "description": "Description for second relational table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [] + } + ] + } +] + diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md new file mode 100644 index 0000000000..9480a0598a --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md @@ -0,0 +1,10 @@ +# Source Plugin: test + +## Tables + +- [incremental_table](incremental_table.md) (Incremental) +- [test_table](test_table.md) + - [relation_table](relation_table.md) + - [relation_relation_table_a](relation_relation_table_a.md) + - [relation_relation_table_b](relation_relation_table_b.md) + - [relation_table2](relation_table2.md) diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md new file mode 100644 index 0000000000..67ca4b8539 --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md @@ -0,0 +1,20 @@ +# Table: incremental_table + +This table shows data for Incremental Table. + +Description for incremental table + +The primary key for this table is **id_col**. +It supports incremental syncs based on the (**id_col**, **id_col2**) columns. + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id|uuid| +|_cq_parent_id|uuid| +|int_col|int64| +|id_col (PK) (Incremental Key)|int64| +|id_col2 (Incremental Key)|int64| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md new file mode 100644 index 0000000000..038791b13e --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md @@ -0,0 +1,21 @@ +# Table: relation_relation_table_a + +This table shows data for Relation Relation Table A. + +Description for relational table's relation + +The primary key for this table is **_cq_id**. + +## Relations + +This table depends on [relation_table](relation_table.md). + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id (PK)|uuid| +|_cq_parent_id|uuid| +|string_col|utf8| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md new file mode 100644 index 0000000000..432f6533f8 --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md @@ -0,0 +1,21 @@ +# Table: relation_relation_table_b + +This table shows data for Relation Relation Table B. + +Description for relational table's relation + +The primary key for this table is **_cq_id**. + +## Relations + +This table depends on [relation_table](relation_table.md). + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id (PK)|uuid| +|_cq_parent_id|uuid| +|string_col|utf8| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md new file mode 100644 index 0000000000..7db8baff7e --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md @@ -0,0 +1,25 @@ +# Table: relation_table + +This table shows data for Relation Table. + +Description for relational table + +The primary key for this table is **_cq_id**. + +## Relations + +This table depends on [test_table](test_table.md). + +The following tables depend on relation_table: + - [relation_relation_table_a](relation_relation_table_a.md) + - [relation_relation_table_b](relation_relation_table_b.md) + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id (PK)|uuid| +|_cq_parent_id|uuid| +|string_col|utf8| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md new file mode 100644 index 0000000000..f0c91578a5 --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md @@ -0,0 +1,29 @@ +# Table: test_table + +This table shows data for Test Table. + +Description for test table + +The composite primary key for this table is (**id_col**, **id_col2**). + +## Relations + +The following tables depend on test_table: + - [relation_table](relation_table.md) + - [relation_table2](relation_table2.md) + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id|uuid| +|_cq_parent_id|uuid| +|int_col|int64| +|id_col (PK)|int64| +|id_col2 (PK)|int64| +|json_col|json| +|list_col|list| +|map_col|map| +|struct_col|struct| diff --git a/plugin/testing.go b/plugin/testing.go new file mode 100644 index 0000000000..562da87461 --- /dev/null +++ b/plugin/testing.go @@ -0,0 +1,141 @@ +package plugin + +import ( + "context" + "testing" + "time" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) + +func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...TestPluginOption) { + t.Helper() + + o := &testPluginOptions{ + parallel: true, + validators: []Validator{validatePlugin}, + } + for _, opt := range opts { + opt(o) + } + if o.parallel { + t.Parallel() + } + + resourcesChannel := make(chan *schema.Resource) + var syncErr error + + if err := plugin.Init(context.Background(), spec); err != nil { + t.Fatal(err) + } + + go func() { + defer close(resourcesChannel) + syncErr = plugin.Sync(context.Background(), time.Now(), *spec.SyncSpec, resourcesChannel) + }() + + syncedResources := make([]*schema.Resource, 0) + for resource := range resourcesChannel { + syncedResources = append(syncedResources, resource) + } + if syncErr != nil { + t.Fatal(syncErr) + } + for _, validator := range o.validators { + validator(t, plugin, syncedResources) + } +} + +type TestPluginOption func(*testPluginOptions) + +func WithTestPluginNoParallel() TestPluginOption { + return func(f *testPluginOptions) { + f.parallel = false + } +} + +func WithTestPluginAdditionalValidators(v Validator) TestPluginOption { + return func(f *testPluginOptions) { + f.validators = append(f.validators, v) + } +} + +type testPluginOptions struct { + parallel bool + validators []Validator +} + +func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Resource) []*schema.Resource { + t.Helper() + + tableResources := make([]*schema.Resource, 0) + + for _, resource := range resources { + if resource.Table.Name == table.Name { + tableResources = append(tableResources, resource) + } + } + + return tableResources +} + +func validateTable(t *testing.T, table *schema.Table, resources []*schema.Resource) { + t.Helper() + tableResources := getTableResources(t, table, resources) + if len(tableResources) == 0 { + t.Errorf("Expected table %s to be synced but it was not found", table.Name) + return + } + validateResources(t, tableResources) +} + +func validatePlugin(t *testing.T, plugin *Plugin, resources []*schema.Resource) { + t.Helper() + tables := extractTables(plugin.staticTables) + for _, table := range tables { + validateTable(t, table, resources) + } +} + +func extractTables(tables schema.Tables) []*schema.Table { + result := make([]*schema.Table, 0) + for _, table := range tables { + result = append(result, table) + result = append(result, extractTables(table.Relations)...) + } + return result +} + +// Validates that every column has at least one non-nil value. +// Also does some additional validations. +func validateResources(t *testing.T, resources []*schema.Resource) { + t.Helper() + + table := resources[0].Table + + // A set of column-names that have values in at least one of the resources. + columnsWithValues := make([]bool, len(table.Columns)) + + for _, resource := range resources { + for i, value := range resource.GetValues() { + if value == nil { + continue + } + if value.IsValid() { + columnsWithValues[i] = true + } + } + } + + // Make sure every column has at least one value. + for i, hasValue := range columnsWithValues { + col := table.Columns[i] + emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil + if !hasValue && !emptyExpected && !col.IgnoreInTests { + t.Errorf("table: %s column %s has no values", table.Name, table.Columns[i].Name) + } + } +} diff --git a/plugin/validate.go b/plugin/validate.go new file mode 100644 index 0000000000..0b21133b05 --- /dev/null +++ b/plugin/validate.go @@ -0,0 +1,27 @@ +package plugin + +import ( + "fmt" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +func (p *Plugin) validate(tables schema.Tables) error { + if err := tables.ValidateDuplicateColumns(); err != nil { + return fmt.Errorf("found duplicate columns in source plugin: %s: %w", p.name, err) + } + + if err := tables.ValidateDuplicateTables(); err != nil { + return fmt.Errorf("found duplicate tables in source plugin: %s: %w", p.name, err) + } + + if err := tables.ValidateTableNames(); err != nil { + return fmt.Errorf("found table with invalid name in source plugin: %s: %w", p.name, err) + } + + if err := tables.ValidateColumnNames(); err != nil { + return fmt.Errorf("found column with invalid name in source plugin: %s: %w", p.name, err) + } + + return nil +} diff --git a/serve/plugin.go b/serve/plugin.go new file mode 100644 index 0000000000..b37be8513c --- /dev/null +++ b/serve/plugin.go @@ -0,0 +1,235 @@ +package serve + +import ( + "fmt" + "net" + "os" + "os/signal" + "strings" + "sync" + "syscall" + + "github.com/cloudquery/plugin-sdk/v3/plugin" + + pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" + pbv0 "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" + + serversv0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/plugin/v0" + "github.com/getsentry/sentry-go" + grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" + "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + "github.com/spf13/cobra" + "github.com/thoas/go-funk" + "golang.org/x/net/netutil" + "google.golang.org/grpc" + "google.golang.org/grpc/test/bufconn" +) + +type pluginServe struct { + plugin *plugin.Plugin + sentryDSN string +} + +type PluginOption func(*pluginServe) + +func WithPluginSentryDSN(dsn string) PluginOption { + return func(s *pluginServe) { + s.sentryDSN = dsn + } +} + +// lis used for unit testing grpc server and client +var testPluginListener *bufconn.Listener +var testPluginListenerLock sync.Mutex + +const servePluginShort = `Start plugin server` + +func Plugin(plugin *plugin.Plugin, opts ...PluginOption) { + s := &pluginServe{ + plugin: plugin, + } + for _, opt := range opts { + opt(s) + } + if err := newCmdPluginRoot(s).Execute(); err != nil { + sentry.CaptureMessage(err.Error()) + fmt.Println(err) + os.Exit(1) + } +} + +// nolint:dupl +func newCmdPluginServe(serve *pluginServe) *cobra.Command { + var address string + var network string + var noSentry bool + logLevel := newEnum([]string{"trace", "debug", "info", "warn", "error"}, "info") + logFormat := newEnum([]string{"text", "json"}, "text") + telemetryLevel := newEnum([]string{"none", "errors", "stats", "all"}, "all") + err := telemetryLevel.Set(getEnvOrDefault("CQ_TELEMETRY_LEVEL", telemetryLevel.Value)) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to set telemetry level: "+err.Error()) + os.Exit(1) + } + + cmd := &cobra.Command{ + Use: "serve", + Short: serveSourceShort, + Long: serveSourceShort, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + zerologLevel, err := zerolog.ParseLevel(logLevel.String()) + if err != nil { + return err + } + var logger zerolog.Logger + if logFormat.String() == "json" { + logger = zerolog.New(os.Stdout).Level(zerologLevel) + } else { + logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) + } + + // opts.Plugin.Logger = logger + var listener net.Listener + if network == "test" { + testSourceListenerLock.Lock() + listener = bufconn.Listen(testBufSize) + testSourceListener = listener.(*bufconn.Listener) + testSourceListenerLock.Unlock() + } else { + listener, err = net.Listen(network, address) + if err != nil { + return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) + } + } + // source plugins can only accept one connection at a time + // unlike destination plugins that can accept multiple connections + limitListener := netutil.LimitListener(listener, 1) + // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go + s := grpc.NewServer( + grpc.ChainUnaryInterceptor( + logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), + ), + grpc.ChainStreamInterceptor( + logging.StreamServerInterceptor(grpczerolog.InterceptorLogger(logger)), + ), + grpc.MaxRecvMsgSize(MaxMsgSize), + grpc.MaxSendMsgSize(MaxMsgSize), + ) + serve.plugin.SetLogger(logger) + pbv0.RegisterPluginServer(s, &serversv0.Server{ + Plugin: serve.plugin, + Logger: logger, + }) + pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ + Versions: []string{"v2"}, + }) + + version := serve.plugin.Version() + + if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { + err = sentry.Init(sentry.ClientOptions{ + Dsn: serve.sentryDSN, + Debug: false, + AttachStacktrace: false, + Release: version, + Transport: sentry.NewHTTPSyncTransport(), + ServerName: "oss", // set to "oss" on purpose to avoid sending any identifying information + // https://docs.sentry.io/platforms/go/configuration/options/#removing-default-integrations + Integrations: func(integrations []sentry.Integration) []sentry.Integration { + var filteredIntegrations []sentry.Integration + for _, integration := range integrations { + if integration.Name() == "Modules" { + continue + } + filteredIntegrations = append(filteredIntegrations, integration) + } + return filteredIntegrations + }, + }) + if err != nil { + log.Error().Err(err).Msg("Error initializing sentry") + } + } + + ctx := cmd.Context() + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt, syscall.SIGTERM) + defer func() { + signal.Stop(c) + }() + + go func() { + select { + case sig := <-c: + logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Source plugin server shutting down") + s.Stop() + case <-ctx.Done(): + logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Source plugin server shutting down") + s.Stop() + } + }() + + logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") + if err := s.Serve(limitListener); err != nil { + return fmt.Errorf("failed to serve: %w", err) + } + return nil + }, + } + cmd.Flags().StringVar(&address, "address", "localhost:7777", "address to serve on. can be tcp: `localhost:7777` or unix socket: `/tmp/plugin.rpc.sock`") + cmd.Flags().StringVar(&network, "network", "tcp", `the network must be "tcp", "tcp4", "tcp6", "unix" or "unixpacket"`) + cmd.Flags().Var(logLevel, "log-level", fmt.Sprintf("log level. one of: %s", strings.Join(logLevel.Allowed, ","))) + cmd.Flags().Var(logFormat, "log-format", fmt.Sprintf("log format. one of: %s", strings.Join(logFormat.Allowed, ","))) + cmd.Flags().BoolVar(&noSentry, "no-sentry", false, "disable sentry") + sendErrors := funk.ContainsString([]string{"all", "errors"}, telemetryLevel.String()) + if !sendErrors { + noSentry = true + } + + return cmd +} + +const ( + pluginDocShort = "Generate documentation for tables" + pluginDocLong = `Generate documentation for tables + +If format is markdown, a destination directory will be created (if necessary) containing markdown files. +Example: +doc ./output + +If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. +Example: +doc --format json . +` +) + +func newCmdPluginDoc(serve *pluginServe) *cobra.Command { + format := newEnum([]string{"json", "markdown"}, "markdown") + cmd := &cobra.Command{ + Use: "doc ", + Short: sourceDocShort, + Long: sourceDocLong, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + pbFormat := pbv0.GenDocs_FORMAT(pbv0.GenDocs_FORMAT_value[format.Value]) + return serve.plugin.GeneratePluginDocs(serve.plugin.StaticTables(), args[0], pbFormat) + }, + } + cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) + return cmd +} + +func newCmdPluginRoot(serve *pluginServe) *cobra.Command { + cmd := &cobra.Command{ + Use: fmt.Sprintf("%s ", serve.plugin.Name()), + } + cmd.AddCommand(newCmdPluginServe(serve)) + cmd.AddCommand(newCmdPluginDoc(serve)) + cmd.CompletionOptions.DisableDefaultCmd = true + cmd.Version = serve.plugin.Version() + return cmd +} diff --git a/serve/plugin_test.go b/serve/plugin_test.go new file mode 100644 index 0000000000..8a541611e9 --- /dev/null +++ b/serve/plugin_test.go @@ -0,0 +1,238 @@ +package serve + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net" + "sync" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/ipc" + pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/plugins/source" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +type TestSourcePluginSpec struct { + Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` +} + +type testExecutionClient struct{} + +var _ schema.ClientMeta = &testExecutionClient{} + +// var errTestExecutionClientErr = fmt.Errorf("error in newTestExecutionClientErr") + +func testTable(name string) *schema.Table { + return &schema.Table{ + Name: name, + Resolver: func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { + res <- map[string]any{ + "TestColumn": 3, + } + return nil + }, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func (*testExecutionClient) ID() string { + return "testExecutionClient" +} + +func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, source.Options) (schema.ClientMeta, error) { + return &testExecutionClient{}, nil +} + +func bufSourceDialer(context.Context, string) (net.Conn, error) { + testSourceListenerLock.Lock() + defer testSourceListenerLock.Unlock() + return testSourceListener.Dial() +} + +func TestSourceSuccess(t *testing.T) { + plugin := source.NewPlugin( + "testPlugin", + "v1.0.0", + []*schema.Table{testTable("test_table"), testTable("test_table2")}, + newTestExecutionClient) + + cmd := newCmdSourceRoot(&sourceServe{ + plugin: plugin, + }) + cmd.SetArgs([]string{"serve", "--network", "test"}) + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + var wg sync.WaitGroup + wg.Add(1) + var serverErr error + go func() { + defer wg.Done() + serverErr = cmd.ExecuteContext(ctx) + }() + defer func() { + cancel() + wg.Wait() + }() + for { + testSourceListenerLock.Lock() + if testSourceListener != nil { + testSourceListenerLock.Unlock() + break + } + testSourceListenerLock.Unlock() + t.Log("waiting for grpc server to start") + time.Sleep(time.Millisecond * 200) + } + + // https://stackoverflow.com/questions/42102496/testing-a-grpc-service + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufSourceDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + if err != nil { + t.Fatalf("Failed to dial bufnet: %v", err) + } + c := pb.NewSourceClient(conn) + + getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) + if err != nil { + t.Fatal(err) + } + if getNameRes.Name != "testPlugin" { + t.Fatalf("expected name to be testPlugin but got %s", getNameRes.Name) + } + + getVersionResponse, err := c.GetVersion(ctx, &pb.GetVersion_Request{}) + if err != nil { + t.Fatal(err) + } + if getVersionResponse.Version != "v1.0.0" { + t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) + } + + spec := specs.Source{ + Name: "testSourcePlugin", + Version: "v1.0.0", + Path: "cloudquery/testSourcePlugin", + Registry: specs.RegistryGithub, + Tables: []string{"test_table"}, + Spec: TestSourcePluginSpec{Accounts: []string{"cloudquery/plugin-sdk"}}, + Destinations: []string{"test"}, + } + specMarshaled, err := json.Marshal(spec) + if err != nil { + t.Fatalf("Failed to marshal spec: %v", err) + } + + getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) + if err != nil { + t.Fatal(err) + } + + tables, err := schema.NewTablesFromBytes(getTablesRes.Tables) + if err != nil { + t.Fatal(err) + } + + if len(tables) != 2 { + t.Fatalf("Expected 2 tables but got %d", len(tables)) + } + if _, err := c.Init(ctx, &pb.Init_Request{Spec: specMarshaled}); err != nil { + t.Fatal(err) + } + + getTablesForSpecRes, err := c.GetDynamicTables(ctx, &pb.GetDynamicTables_Request{}) + if err != nil { + t.Fatal(err) + } + tables, err = schema.NewTablesFromBytes(getTablesForSpecRes.Tables) + if err != nil { + t.Fatal(err) + } + + if len(tables) != 1 { + t.Fatalf("Expected 1 table but got %d", len(tables)) + } + + syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) + if err != nil { + t.Fatal(err) + } + var resources []arrow.Record + for { + r, err := syncClient.Recv() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) + if err != nil { + t.Fatal(err) + } + for rdr.Next() { + rec := rdr.Record() + rec.Retain() + resources = append(resources, rec) + } + } + + totalResources := 0 + for _, resource := range resources { + sc := resource.Schema() + tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) + if !ok { + t.Fatal("Expected table name metadata to be set") + } + if tableName != "test_table" { + t.Fatalf("Expected resource with table name test_table. got: %s", tableName) + } + if len(resource.Columns()) != 5 { + t.Fatalf("Expected resource with data length 3 but got %d", len(resource.Columns())) + } + totalResources++ + } + if totalResources != 1 { + t.Fatalf("Expected 1 resource on channel but got %d", totalResources) + } + + getMetricsRes, err := c.GetMetrics(ctx, &pb.GetMetrics_Request{}) + if err != nil { + t.Fatal(err) + } + var stats source.Metrics + if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { + t.Fatal(err) + } + + clientStats := stats.TableClient[""][""] + if clientStats.Resources != 1 { + t.Fatalf("Expected 1 resource but got %d", clientStats.Resources) + } + + if clientStats.Errors != 0 { + t.Fatalf("Expected 0 errors but got %d", clientStats.Errors) + } + + if clientStats.Panics != 0 { + t.Fatalf("Expected 0 panics but got %d", clientStats.Panics) + } + + cancel() + wg.Wait() + if serverErr != nil { + t.Fatal(serverErr) + } +} From 1dd18d53986051daa1bfef4bbab28bd853476a7f Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Tue, 30 May 2023 20:53:37 +0300 Subject: [PATCH 002/125] wip --- go.mod | 7 +- go.sum | 2 + internal/memdb/memdb.go | 41 +- internal/memdb/memdb_test.go | 128 ++--- internal/pk/pk.go | 2 +- .../servers/destination/v0/destinations.go | 12 +- .../servers/destination/v0/schemav2tov3.go | 4 +- internal/servers/destination/v0/specv3tov1.go | 77 +++ .../servers/destination/v1/destinations.go | 12 +- internal/servers/destination/v1/specv3tov1.go | 77 +++ internal/servers/plugin/{v0 => v3}/plugin.go | 48 +- internal/servers/source/v2/source.go | 173 ------- ...hmark_test.go => benchmark_test.go.backup} | 2 +- {plugins/destination => plugin}/diff.go | 0 plugin/docs.go | 6 +- plugin/{docs_test.go => docs_test.go.backup} | 4 +- .../destination => plugin}/managed_writer.go | 15 +- plugin/metrics.go | 2 +- {plugins/destination => plugin}/nulls.go | 6 +- plugin/options.go | 33 +- plugin/plugin.go | 191 +++++-- .../plugin_managed_source_test.go | 142 +++--- plugin/plugin_round_robin_test.go | 102 +++- plugin/plugin_test.go | 470 ------------------ plugin/scheduler.go | 2 +- plugin/scheduler_dfs.go | 6 +- plugin/scheduler_round_robin.go | 4 +- plugin/scheduler_round_robin_test.go | 2 +- .../testing_overwrite_deletestale.go | 21 +- plugin/{testing.go => testing_sync.go} | 42 +- .../testing_write.go | 38 +- .../testing_write_append.go | 17 +- .../testing_write_migrate.go | 39 +- .../testing_write_overwrite.go | 19 +- plugin/validate.go | 2 +- plugins/destination/metrics.go | 8 - plugins/destination/plugin.go | 314 ------------ plugins/destination/unmanaged_writer.go | 14 - plugins/docs.go | 2 - plugins/source/benchmark_test.go | 429 ---------------- plugins/source/docs.go | 241 --------- plugins/source/docs_test.go | 164 ------ plugins/source/metrics.go | 207 -------- plugins/source/metrics_test.go | 186 ------- plugins/source/options.go | 39 -- plugins/source/plugin.go | 345 ------------- plugins/source/scheduler.go | 177 ------- plugins/source/scheduler_dfs.go | 234 --------- plugins/source/scheduler_round_robin.go | 104 ---- plugins/source/scheduler_round_robin_test.go | 65 --- plugins/source/templates/all_tables.md.go.tpl | 5 - .../templates/all_tables_entry.md.go.tpl | 5 - plugins/source/templates/table.md.go.tpl | 44 -- .../TestGeneratePluginDocs-JSON-__tables.json | 214 -------- .../TestGeneratePluginDocs-Markdown-README.md | 10 - ...tePluginDocs-Markdown-incremental_table.md | 20 - ...Docs-Markdown-relation_relation_table_a.md | 21 - ...Docs-Markdown-relation_relation_table_b.md | 21 - ...eratePluginDocs-Markdown-relation_table.md | 25 - ...tGeneratePluginDocs-Markdown-test_table.md | 29 -- plugins/source/testing.go | 141 ------ plugins/source/validate.go | 25 - scalar/inet.go | 2 +- scalar/json.go | 2 +- scalar/mac.go | 2 +- scalar/scalar.go | 12 +- scalar/uuid.go | 2 +- schema/meta.go | 4 +- schema/resource.go | 2 +- schema/table.go | 2 +- schema/testdata.go | 3 +- serve/destination.go | 209 -------- serve/destination_v0_test.go | 32 +- serve/destination_v1_test.go | 21 +- serve/plugin.go | 51 +- serve/plugin_test.go | 68 +-- serve/source.go | 233 --------- serve/source_v2_test.go | 238 --------- transformers/struct.go | 6 +- transformers/struct_test.go | 4 +- 80 files changed, 908 insertions(+), 4822 deletions(-) create mode 100644 internal/servers/destination/v0/specv3tov1.go create mode 100644 internal/servers/destination/v1/specv3tov1.go rename internal/servers/plugin/{v0 => v3}/plugin.go (87%) delete mode 100644 internal/servers/source/v2/source.go rename plugin/{benchmark_test.go => benchmark_test.go.backup} (99%) rename {plugins/destination => plugin}/diff.go (100%) rename plugin/{docs_test.go => docs_test.go.backup} (97%) rename {plugins/destination => plugin}/managed_writer.go (92%) rename {plugins/destination => plugin}/nulls.go (94%) rename plugins/source/plugin_test.go => plugin/plugin_managed_source_test.go (76%) delete mode 100644 plugin/plugin_test.go rename plugins/destination/plugin_testing_overwrite_delete_stale.go => plugin/testing_overwrite_deletestale.go (91%) rename plugin/{testing.go => testing_sync.go} (73%) rename plugins/destination/plugin_testing.go => plugin/testing_write.go (88%) rename plugins/destination/plugin_testing_write_append.go => plugin/testing_write_append.go (85%) rename plugins/destination/plugin_testing_migrate.go => plugin/testing_write_migrate.go (85%) rename plugins/destination/plugin_testing_overwrite.go => plugin/testing_write_overwrite.go (87%) delete mode 100644 plugins/destination/metrics.go delete mode 100644 plugins/destination/plugin.go delete mode 100644 plugins/destination/unmanaged_writer.go delete mode 100644 plugins/docs.go delete mode 100644 plugins/source/benchmark_test.go delete mode 100644 plugins/source/docs.go delete mode 100644 plugins/source/docs_test.go delete mode 100644 plugins/source/metrics.go delete mode 100644 plugins/source/metrics_test.go delete mode 100644 plugins/source/options.go delete mode 100644 plugins/source/plugin.go delete mode 100644 plugins/source/scheduler.go delete mode 100644 plugins/source/scheduler_dfs.go delete mode 100644 plugins/source/scheduler_round_robin.go delete mode 100644 plugins/source/scheduler_round_robin_test.go delete mode 100644 plugins/source/templates/all_tables.md.go.tpl delete mode 100644 plugins/source/templates/all_tables_entry.md.go.tpl delete mode 100644 plugins/source/templates/table.md.go.tpl delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md delete mode 100644 plugins/source/testing.go delete mode 100644 plugins/source/validate.go delete mode 100644 serve/destination.go delete mode 100644 serve/source.go delete mode 100644 serve/source_v2_test.go diff --git a/go.mod b/go.mod index d303bddb50..082be87eb7 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,9 @@ -module github.com/cloudquery/plugin-sdk/v3 +module github.com/cloudquery/plugin-sdk/v4 go 1.19 require ( - github.com/apache/arrow/go/v13 v13.0.0-20230531201200-cbc17a98dfd9 + github.com/apache/arrow/go/v13 v13.0.0-20230525142029-2d32efeedad8 github.com/bradleyjkemp/cupaloy/v2 v2.8.0 github.com/cloudquery/plugin-pb-go v1.1.0 github.com/cloudquery/plugin-sdk/v2 v2.7.0 @@ -31,6 +31,9 @@ replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13 replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go require ( + github.com/andybalholm/brotli v1.0.5 // indirect + github.com/apache/thrift v0.16.0 // indirect + github.com/cloudquery/plugin-sdk/v3 v3.7.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect diff --git a/go.sum b/go.sum index 8f7dfaf7d1..17a7a98de3 100644 --- a/go.sum +++ b/go.sum @@ -44,6 +44,8 @@ github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSE github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= +github.com/cloudquery/plugin-sdk/v3 v3.7.0 h1:aRazh17V+6AA00vmxPZRv2rudNEerSd3kqbyffRl6SA= +github.com/cloudquery/plugin-sdk/v3 v3.7.0/go.mod h1:z9Fny7SO8fNyVx6bOTM037lo7h3vJI+ZHUc/RMj20VU= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 9c6bbb74d1..c84c32255e 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -10,15 +10,15 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) // client is mostly used for testing the destination plugin. type client struct { - spec specs.Destination + spec pbPlugin.Spec memoryDB map[string][]arrow.Record tables map[string]*schema.Table memoryDBLock sync.RWMutex @@ -40,7 +40,7 @@ func WithBlockingWrite() Option { } } -func GetNewClient(options ...Option) destination.NewClientFunc { +func GetNewClient(options ...Option) plugin.NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -48,7 +48,7 @@ func GetNewClient(options ...Option) destination.NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, specs.Destination) (destination.Client, error) { + return func(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { return c, nil } } @@ -61,7 +61,7 @@ func getTestLogger(t *testing.T) zerolog.Logger { ).Level(zerolog.DebugLevel).With().Timestamp().Logger() } -func NewClient(_ context.Context, _ zerolog.Logger, spec specs.Destination) (destination.Client, error) { +func NewClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (plugin.Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), @@ -69,7 +69,7 @@ func NewClient(_ context.Context, _ zerolog.Logger, spec specs.Destination) (des }, nil } -func NewClientErrOnNew(context.Context, zerolog.Logger, specs.Destination) (destination.Client, error) { +func NewClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -94,6 +94,21 @@ func (c *client) overwrite(table *schema.Table, data arrow.Record) { c.memoryDB[tableName] = append(c.memoryDB[tableName], data) } +func (c *client) ID() string { + return "testDestinationMemDB" +} + +func (c *client) Sync(ctx context.Context, metrics *plugin.Metrics, res chan<- arrow.Record) error { + c.memoryDBLock.RLock() + for tableName := range c.memoryDB { + for _, row := range c.memoryDB[tableName] { + res <- row + } + } + c.memoryDBLock.RUnlock() + return nil +} + func (c *client) Migrate(_ context.Context, tables schema.Tables) error { for _, table := range tables { tableName := table.Name @@ -160,7 +175,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan ar return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if c.spec.WriteMode == specs.WriteModeAppend { + if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -184,7 +199,7 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou tableName := table.Name for _, resource := range resources { c.memoryDBLock.Lock() - if c.spec.WriteMode == specs.WriteModeAppend { + if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -194,8 +209,8 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou return nil } -func (*client) Metrics() destination.Metrics { - return destination.Metrics{} +func (*client) Metrics() plugin.Metrics { + return plugin.Metrics{} } func (c *client) Close(context.Context) error { @@ -224,4 +239,4 @@ func (c *client) deleteStaleTable(_ context.Context, table *schema.Table, source } } c.memoryDB[tableName] = filteredTable -} +} \ No newline at end of file diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index 7f9e8a5759..b5196d45f3 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -6,38 +6,38 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" "github.com/stretchr/testify/require" ) -var migrateStrategyOverwrite = destination.MigrateStrategy{ - AddColumn: specs.MigrateModeForced, - AddColumnNotNull: specs.MigrateModeForced, - RemoveColumn: specs.MigrateModeForced, - RemoveColumnNotNull: specs.MigrateModeForced, - ChangeColumn: specs.MigrateModeForced, +var migrateStrategyOverwrite = plugin.MigrateStrategy{ + AddColumn: pbPlugin.WriteSpec_FORCE, + AddColumnNotNull: pbPlugin.WriteSpec_FORCE, + RemoveColumn: pbPlugin.WriteSpec_FORCE, + RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, + ChangeColumn: pbPlugin.WriteSpec_FORCE, } -var migrateStrategyAppend = destination.MigrateStrategy{ - AddColumn: specs.MigrateModeForced, - AddColumnNotNull: specs.MigrateModeForced, - RemoveColumn: specs.MigrateModeForced, - RemoveColumnNotNull: specs.MigrateModeForced, - ChangeColumn: specs.MigrateModeForced, +var migrateStrategyAppend = plugin.MigrateStrategy{ + AddColumn: pbPlugin.WriteSpec_FORCE, + AddColumnNotNull: pbPlugin.WriteSpec_FORCE, + RemoveColumn: pbPlugin.WriteSpec_FORCE, + RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, + ChangeColumn: pbPlugin.WriteSpec_FORCE, } func TestPluginUnmanagedClient(t *testing.T) { - destination.PluginTestSuiteRunner( + plugin.PluginTestSuiteRunner( t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient) + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient) }, - specs.Destination{}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }, @@ -45,51 +45,55 @@ func TestPluginUnmanagedClient(t *testing.T) { } func TestPluginManagedClient(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient, destination.WithManagedWriter()) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter()) }, - specs.Destination{}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient, destination.WithManagedWriter(), - destination.WithDefaultBatchSize(1), - destination.WithDefaultBatchSizeBytes(1)) - }, specs.Destination{}, - destination.PluginTestSuiteTests{ + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), + plugin.WithDefaultBatchSize(1), + plugin.WithDefaultBatchSizeBytes(1)) + }, pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient, destination.WithManagedWriter(), - destination.WithDefaultBatchSize(100000000), - destination.WithDefaultBatchSizeBytes(100000000)) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), + plugin.WithDefaultBatchSize(100000000), + plugin.WithDefaultBatchSizeBytes(100000000)) }, - specs.Destination{}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithCQPKs(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient) }, - specs.Destination{PKMode: specs.PKModeCQID}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{ + PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, + }, + }, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) @@ -97,8 +101,8 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() - p := destination.NewPlugin("test", "development", NewClientErrOnNew) - err := p.Init(ctx, getTestLogger(t), specs.Destination{}) + p := plugin.NewPlugin("test", "development", NewClientErrOnNew) + err := p.Init(ctx, pbPlugin.Spec{}) if err == nil { t.Fatal("expected error") @@ -108,8 +112,8 @@ func TestPluginOnNewError(t *testing.T) { func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) - p := destination.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, getTestLogger(t), specs.Destination{}); err != nil { + p := plugin.NewPlugin("test", "development", newClientFunc) + if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -118,7 +122,7 @@ func TestOnWriteError(t *testing.T) { } sourceName := "TestDestinationOnWriteError" syncTime := time.Now() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } ch := make(chan arrow.Record, 1) @@ -143,8 +147,8 @@ func TestOnWriteError(t *testing.T) { func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) - p := destination.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, getTestLogger(t), specs.Destination{}); err != nil { + p := plugin.NewPlugin("test", "development", newClientFunc) + if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -153,7 +157,7 @@ func TestOnWriteCtxCancelled(t *testing.T) { } sourceName := "TestDestinationOnWriteError" syncTime := time.Now() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } ch := make(chan arrow.Record, 1) @@ -180,22 +184,22 @@ func TestPluginInit(t *testing.T) { ) var ( - batchSizeObserved int - batchSizeBytesObserved int + batchSizeObserved uint64 + batchSizeBytesObserved uint64 ) - p := destination.NewPlugin( + p := plugin.NewPlugin( "test", "development", - func(ctx context.Context, logger zerolog.Logger, s specs.Destination) (destination.Client, error) { - batchSizeObserved = s.BatchSize - batchSizeBytesObserved = s.BatchSizeBytes + func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (plugin.Client, error) { + batchSizeObserved = s.WriteSpec.BatchSize + batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes return NewClient(ctx, logger, s) }, - destination.WithDefaultBatchSize(batchSize), - destination.WithDefaultBatchSizeBytes(batchSizeBytes), + plugin.WithDefaultBatchSize(batchSize), + plugin.WithDefaultBatchSizeBytes(batchSizeBytes), ) - require.NoError(t, p.Init(context.TODO(), getTestLogger(t), specs.Destination{})) + require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{})) require.Equal(t, batchSize, batchSizeObserved) require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -} +} \ No newline at end of file diff --git a/internal/pk/pk.go b/internal/pk/pk.go index 22b2b277db..ca8c5f2806 100644 --- a/internal/pk/pk.go +++ b/internal/pk/pk.go @@ -4,7 +4,7 @@ import ( "strings" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) func String(resource arrow.Record) string { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index c09b242e4c..c315bd0652 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -12,8 +12,8 @@ import ( pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" @@ -22,7 +22,7 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *destination.Plugin + Plugin *plugin.Plugin Logger zerolog.Logger spec specs.Destination } @@ -39,7 +39,8 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, s.Logger, spec) + specV3 := SpecV1ToV3(spec) + return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, specV3) } func (s *Server) GetName(context.Context, *pbBase.GetName_Request) (*pbBase.GetName_Response, error) { @@ -102,8 +103,9 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) + sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) }) sourceColumn := &schemav2.Text{} _ = sourceColumn.Set(sourceSpec.Name) diff --git a/internal/servers/destination/v0/schemav2tov3.go b/internal/servers/destination/v0/schemav2tov3.go index eabd37fd94..3b63448b15 100644 --- a/internal/servers/destination/v0/schemav2tov3.go +++ b/internal/servers/destination/v0/schemav2tov3.go @@ -8,8 +8,8 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" ) func TablesV2ToV3(tables schemav2.Tables) schema.Tables { diff --git a/internal/servers/destination/v0/specv3tov1.go b/internal/servers/destination/v0/specv3tov1.go new file mode 100644 index 0000000000..31ab4fb5de --- /dev/null +++ b/internal/servers/destination/v0/specv3tov1.go @@ -0,0 +1,77 @@ +package destination + +import ( + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-pb-go/specs" +) + +func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + SyncSpec: &pbPlugin.SyncSpec{ + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), + DetrministicCqId: spec.DeterministicCQID, + }, + } + switch spec.Scheduler { + case specs.SchedulerDFS: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + case specs.SchedulerRoundRobin: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + default: + panic("invalid scheduler " + spec.Scheduler.String()) + } + return newSpec +} + +func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + WriteSpec: &pbPlugin.WriteSpec{ + BatchSize: uint64(spec.BatchSize), + BatchSizeBytes: uint64(spec.BatchSizeBytes), + }, + } + switch spec.Registry { + case specs.RegistryGithub: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + case specs.RegistryGrpc: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + case specs.RegistryLocal: + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + default: + panic("invalid registry " + spec.Registry.String()) + } + switch spec.WriteMode { + case specs.WriteModeAppend: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + case specs.WriteModeOverwrite: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + case specs.WriteModeOverwriteDeleteStale: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + default: + panic("invalid write mode " + spec.WriteMode.String()) + } + switch spec.PKMode { + case specs.PKModeDefaultKeys: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + case specs.PKModeCQID: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + } + switch spec.MigrateMode { + case specs.MigrateModeSafe: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + case specs.MigrateModeForced: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + default: + panic("invalid migrate mode " + spec.MigrateMode.String()) + } + return newSpec +} \ No newline at end of file diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 447c03b596..4748c3c947 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -11,8 +11,8 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" @@ -21,7 +21,7 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *destination.Plugin + Plugin *plugin.Plugin Logger zerolog.Logger spec specs.Destination } @@ -32,7 +32,8 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.Logger, spec) + specV3 := SpecV1ToV3(spec) + return &pb.Configure_Response{}, s.Plugin.Init(ctx, specV3) } func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { @@ -96,8 +97,9 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { syncTime := r.Timestamp.AsTime() s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) + sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) }) for { diff --git a/internal/servers/destination/v1/specv3tov1.go b/internal/servers/destination/v1/specv3tov1.go new file mode 100644 index 0000000000..31ab4fb5de --- /dev/null +++ b/internal/servers/destination/v1/specv3tov1.go @@ -0,0 +1,77 @@ +package destination + +import ( + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-pb-go/specs" +) + +func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + SyncSpec: &pbPlugin.SyncSpec{ + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), + DetrministicCqId: spec.DeterministicCQID, + }, + } + switch spec.Scheduler { + case specs.SchedulerDFS: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + case specs.SchedulerRoundRobin: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + default: + panic("invalid scheduler " + spec.Scheduler.String()) + } + return newSpec +} + +func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + WriteSpec: &pbPlugin.WriteSpec{ + BatchSize: uint64(spec.BatchSize), + BatchSizeBytes: uint64(spec.BatchSizeBytes), + }, + } + switch spec.Registry { + case specs.RegistryGithub: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + case specs.RegistryGrpc: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + case specs.RegistryLocal: + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + default: + panic("invalid registry " + spec.Registry.String()) + } + switch spec.WriteMode { + case specs.WriteModeAppend: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + case specs.WriteModeOverwrite: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + case specs.WriteModeOverwriteDeleteStale: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + default: + panic("invalid write mode " + spec.WriteMode.String()) + } + switch spec.PKMode { + case specs.PKModeDefaultKeys: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + case specs.PKModeCQID: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + } + switch spec.MigrateMode { + case specs.MigrateModeSafe: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + case specs.MigrateModeForced: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + default: + panic("invalid migrate mode " + spec.MigrateMode.String()) + } + return newSpec +} \ No newline at end of file diff --git a/internal/servers/plugin/v0/plugin.go b/internal/servers/plugin/v3/plugin.go similarity index 87% rename from internal/servers/plugin/v0/plugin.go rename to internal/servers/plugin/v3/plugin.go index d00b16059c..8a117bee9f 100644 --- a/internal/servers/plugin/v0/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -12,14 +12,10 @@ import ( "path/filepath" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" - "github.com/apache/arrow/go/v13/arrow/memory" - pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/plugin" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" @@ -34,7 +30,7 @@ type Server struct { pb.UnimplementedPluginServer Plugin *plugin.Plugin Logger zerolog.Logger - spec pb.Spec + spec pb.Spec } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -81,23 +77,19 @@ func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Respo } func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { - resources := make(chan *schema.Resource) + records := make(chan arrow.Record) var syncErr error ctx := stream.Context() go func() { - defer close(resources) - err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, resources) + defer close(records) + err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, records) if err != nil { - syncErr = fmt.Errorf("failed to sync resources: %w", err) + syncErr = fmt.Errorf("failed to sync records: %w", err) } }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() + for rec := range records { var buf bytes.Buffer w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) @@ -111,9 +103,11 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { msg := &pb.Sync_Response{ Resource: buf.Bytes(), } - err := checkMessageSize(msg, resource) + err := checkMessageSize(msg, rec) if err != nil { - s.Logger.Warn().Str("table", resource.Table.Name). + sc := rec.Schema() + tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) + s.Logger.Warn().Str("table", tName). Int("bytes", len(msg.String())). Msg("Row exceeding max bytes ignored") continue @@ -130,7 +124,7 @@ func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMet // Aggregate metrics before sending to keep response size small. // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 m := s.Plugin.Metrics() - agg := &source.TableClientMetrics{} + agg := &plugin.TableClientMetrics{} for _, table := range m.TableClient { for _, tableClient := range table { agg.Resources += tableClient.Resources @@ -138,8 +132,8 @@ func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMet agg.Panics += tableClient.Panics } } - b, err := json.Marshal(&source.Metrics{ - TableClient: map[string]map[string]*source.TableClientMetrics{"": {"": agg}}, + b, err := json.Marshal(&plugin.Metrics{ + TableClient: map[string]map[string]*plugin.TableClientMetrics{"": {"": agg}}, }) if err != nil { return nil, fmt.Errorf("failed to marshal source metrics: %w", err) @@ -255,7 +249,7 @@ func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) e } if err := srv.Send(&pb.GenDocs_Response{ Filename: f.Name(), - Content: content, + Content: content, }); err != nil { return fmt.Errorf("failed to send file: %w", err) } @@ -263,12 +257,14 @@ func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) e return nil } -func checkMessageSize(msg proto.Message, resource *schema.Resource) error { +func checkMessageSize(msg proto.Message, record arrow.Record) error { size := proto.Size(msg) // log error to Sentry if row exceeds half of the max size if size > MaxMsgSize/2 { + sc := record.Schema() + tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) + scope.SetTag("table", tName) scope.SetExtra("bytes", size) sentry.CurrentHub().CaptureMessage("Large message detected") }) @@ -292,4 +288,4 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { } setCQIDAsPrimaryKeysForTables(table.Relations) } -} \ No newline at end of file +} diff --git a/internal/servers/source/v2/source.go b/internal/servers/source/v2/source.go deleted file mode 100644 index a010fefef3..0000000000 --- a/internal/servers/source/v2/source.go +++ /dev/null @@ -1,173 +0,0 @@ -package source - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/ipc" - "github.com/apache/arrow/go/v13/arrow/memory" - pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/getsentry/sentry-go" - "github.com/rs/zerolog" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - "google.golang.org/protobuf/proto" -) - -const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB - -type Server struct { - pb.UnimplementedSourceServer - Plugin *source.Plugin - Logger zerolog.Logger -} - -func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTables_Response, error) { - tables := s.Plugin.Tables().ToArrowSchemas() - encoded, err := tables.Encode() - if err != nil { - return nil, fmt.Errorf("failed to encode tables: %w", err) - } - return &pb.GetTables_Response{ - Tables: encoded, - }, nil -} - -func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { - tables := s.Plugin.GetDynamicTables().ToArrowSchemas() - encoded, err := tables.Encode() - if err != nil { - return nil, fmt.Errorf("failed to encode tables: %w", err) - } - return &pb.GetDynamicTables_Response{ - Tables: encoded, - }, nil -} - -func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { - return &pb.GetName_Response{ - Name: s.Plugin.Name(), - }, nil -} - -func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVersion_Response, error) { - return &pb.GetVersion_Response{ - Version: s.Plugin.Version(), - }, nil -} - -func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - var spec specs.Source - dec := json.NewDecoder(bytes.NewReader(req.Spec)) - dec.UseNumber() - // TODO: warn about unknown fields - if err := dec.Decode(&spec); err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to decode spec: %v", err) - } - - if err := s.Plugin.Init(ctx, spec); err != nil { - return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) - } - return &pb.Init_Response{}, nil -} - -func (s *Server) Sync(req *pb.Sync_Request, stream pb.Source_SyncServer) error { - resources := make(chan *schema.Resource) - var syncErr error - ctx := stream.Context() - - go func() { - defer close(resources) - err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), resources) - if err != nil { - syncErr = fmt.Errorf("failed to sync resources: %w", err) - } - }() - - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - - var buf bytes.Buffer - w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) - if err := w.Write(rec); err != nil { - return status.Errorf(codes.Internal, "failed to write record: %v", err) - } - if err := w.Close(); err != nil { - return status.Errorf(codes.Internal, "failed to close writer: %v", err) - } - - msg := &pb.Sync_Response{ - Resource: buf.Bytes(), - } - err := checkMessageSize(msg, resource) - if err != nil { - s.Logger.Warn().Str("table", resource.Table.Name). - Int("bytes", len(msg.String())). - Msg("Row exceeding max bytes ignored") - continue - } - if err := stream.Send(msg); err != nil { - return status.Errorf(codes.Internal, "failed to send resource: %v", err) - } - } - - return syncErr -} - -func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMetrics_Response, error) { - // Aggregate metrics before sending to keep response size small. - // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 - m := s.Plugin.Metrics() - agg := &source.TableClientMetrics{} - for _, table := range m.TableClient { - for _, tableClient := range table { - agg.Resources += tableClient.Resources - agg.Errors += tableClient.Errors - agg.Panics += tableClient.Panics - } - } - b, err := json.Marshal(&source.Metrics{ - TableClient: map[string]map[string]*source.TableClientMetrics{"": {"": agg}}, - }) - if err != nil { - return nil, fmt.Errorf("failed to marshal source metrics: %w", err) - } - return &pb.GetMetrics_Response{ - Metrics: b, - }, nil -} - -func (s *Server) GenDocs(_ context.Context, req *pb.GenDocs_Request) (*pb.GenDocs_Response, error) { - err := s.Plugin.GeneratePluginDocs(req.Path, req.Format.String()) - if err != nil { - return nil, fmt.Errorf("failed to generate docs: %w", err) - } - return &pb.GenDocs_Response{}, nil -} - -func checkMessageSize(msg proto.Message, resource *schema.Resource) error { - size := proto.Size(msg) - // log error to Sentry if row exceeds half of the max size - if size > MaxMsgSize/2 { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetExtra("bytes", size) - sentry.CurrentHub().CaptureMessage("Large message detected") - }) - } - if size > MaxMsgSize { - return errors.New("message exceeds max size") - } - return nil -} diff --git a/plugin/benchmark_test.go b/plugin/benchmark_test.go.backup similarity index 99% rename from plugin/benchmark_test.go rename to plugin/benchmark_test.go.backup index 36a86cd3cd..a1bf87d5a8 100644 --- a/plugin/benchmark_test.go +++ b/plugin/benchmark_test.go.backup @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" ) diff --git a/plugins/destination/diff.go b/plugin/diff.go similarity index 100% rename from plugins/destination/diff.go rename to plugin/diff.go diff --git a/plugin/docs.go b/plugin/docs.go index 5827e5edcf..e66bf7ebb2 100644 --- a/plugin/docs.go +++ b/plugin/docs.go @@ -11,9 +11,9 @@ import ( "sort" "text/template" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/schema" ) //go:embed templates/*.go.tpl diff --git a/plugin/docs_test.go b/plugin/docs_test.go.backup similarity index 97% rename from plugin/docs_test.go rename to plugin/docs_test.go.backup index 44e7b34afd..06f271f9fd 100644 --- a/plugin/docs_test.go +++ b/plugin/docs_test.go.backup @@ -9,8 +9,8 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/bradleyjkemp/cupaloy/v2" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/stretchr/testify/require" ) diff --git a/plugins/destination/managed_writer.go b/plugin/managed_writer.go similarity index 92% rename from plugins/destination/managed_writer.go rename to plugin/managed_writer.go index 0d00f14bc3..74092f785e 100644 --- a/plugins/destination/managed_writer.go +++ b/plugin/managed_writer.go @@ -1,17 +1,16 @@ -package destination +package plugin import ( "context" "fmt" "sync" - "sync/atomic" "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/internal/pk" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/internal/pk" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type worker struct { @@ -33,7 +32,7 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab } return } - if len(resources) == p.spec.BatchSize || sizeBytes+util.TotalRecordSize(r) > int64(p.spec.BatchSizeBytes) { + if uint64(len(resources)) == p.spec.WriteSpec.BatchSize || sizeBytes+util.TotalRecordSize(r) > int64(p.spec.WriteSpec.BatchSizeBytes) { p.flush(ctx, metrics, table, resources) resources = resources[:0] // allows for mem reuse sizeBytes = 0 @@ -67,10 +66,10 @@ func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Tabl if err := p.client.WriteTableBatch(ctx, table, resources); err != nil { p.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") // we don't return an error as we need to continue until channel is closed otherwise there will be a deadlock - atomic.AddUint64(&metrics.Errors, uint64(batchSize)) + // atomic.AddUint64(&metrics.Errors, uint64(batchSize)) } else { p.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") - atomic.AddUint64(&metrics.Writes, uint64(batchSize)) + // atomic.AddUint64(&metrics.Writes, uint64(batchSize)) } } @@ -166,4 +165,4 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tab } p.workersLock.Unlock() return nil -} +} \ No newline at end of file diff --git a/plugin/metrics.go b/plugin/metrics.go index 182bc243a4..8ba88823b9 100644 --- a/plugin/metrics.go +++ b/plugin/metrics.go @@ -4,7 +4,7 @@ import ( "sync/atomic" "time" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type Metrics struct { diff --git a/plugins/destination/nulls.go b/plugin/nulls.go similarity index 94% rename from plugins/destination/nulls.go rename to plugin/nulls.go index 6f965106e4..12ad0facf7 100644 --- a/plugins/destination/nulls.go +++ b/plugin/nulls.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "github.com/apache/arrow/go/v13/arrow" @@ -69,4 +69,8 @@ func (f AllowNullFunc) replaceNullsByEmpty(records []arrow.Record) { } records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) } +<<<<<<< HEAD:plugins/destination/nulls.go } +======= +} +>>>>>>> 5ba1713 (wip):plugin/nulls.go diff --git a/plugin/options.go b/plugin/options.go index 1290b7cd56..d3104875e7 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -2,16 +2,17 @@ package plugin import ( "context" + "time" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type GetTables func(ctx context.Context, c Client) (schema.Tables, error) type Option func(*Plugin) -// WithDynamicTableOption allows the plugin to return list of tables after call to New -func WithDynamicTableOption(getDynamicTables GetTables) Option { +// WithDynamicTable allows the plugin to return list of tables after call to New +func WithDynamicTable(getDynamicTables GetTables) Option { return func(p *Plugin) { p.getDynamicTables = getDynamicTables } @@ -38,9 +39,33 @@ func WithTitleTransformer(t func(*schema.Table) string) Option { } } - func WithStaticTables(tables schema.Tables) Option { return func(p *Plugin) { p.staticTables = tables } +} + + +func WithManagedWriter() Option { + return func(p *Plugin) { + p.managedWriter = true + } +} + +func WithBatchTimeout(seconds int) Option { + return func(p *Plugin) { + p.batchTimeout = time.Duration(seconds) * time.Second + } +} + +func WithDefaultBatchSize(defaultBatchSize int) Option { + return func(p *Plugin) { + p.defaultBatchSize = defaultBatchSize + } +} + +func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { + return func(p *Plugin) { + p.defaultBatchSizeBytes = defaultBatchSizeBytes + } } \ No newline at end of file diff --git a/plugin/plugin.go b/plugin/plugin.go index e1efa19cb1..e812cf941c 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,14 +7,17 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/backend" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/backend" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/semaphore" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" ) type Options struct { @@ -25,28 +28,40 @@ type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, type NewClientFunc func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) -type UnmanagedClient interface { - schema.ClientMeta - Sync(ctx context.Context, metrics *Metrics, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error -} - type Client interface { - Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error + ID() string + Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error Migrate(ctx context.Context, tables schema.Tables) error + WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error + Close(ctx context.Context) error } type UnimplementedWriter struct{} -func (UnimplementedWriter) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { +func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables) error { + return fmt.Errorf("not implemented") +} + +func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error { + return fmt.Errorf("not implemented") +} + +func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { return fmt.Errorf("not implemented") } type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { +func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + +type UnimplementedRead struct{} + +func (UnimplementedRead) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } @@ -94,6 +109,14 @@ type Plugin struct { // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation titleTransformer func(*schema.Table) string syncTime time.Time + + managedWriter bool + workers map[string]*worker + workersLock *sync.Mutex + + batchTimeout time.Duration + defaultBatchSize int + defaultBatchSizeBytes int } const ( @@ -168,10 +191,11 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... p := Plugin{ name: name, version: version, - internalColumns: true, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - newClient: newClient, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, + metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, } for _, opt := range options { opt(&p) @@ -204,7 +228,6 @@ func (p *Plugin) Version() string { return p.version } - func (p *Plugin) SetLogger(logger zerolog.Logger) { p.logger = logger.With().Str("module", p.name+"-src").Logger() } @@ -222,6 +245,21 @@ func (p *Plugin) DynamicTables() schema.Tables { return p.sessionTables } +func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { + var readErr error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + readErr = p.Read(ctx, table, sourceName, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, readErr +} + func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { return p.client.Read(ctx, table, sourceName, res) } @@ -243,6 +281,43 @@ func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { } p.spec = spec + tables := p.staticTables + if p.getDynamicTables != nil { + tables, err = p.getDynamicTables(ctx, p.client) + if err != nil { + return fmt.Errorf("failed to get dynamic tables: %w", err) + } + + tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) + if err != nil { + return fmt.Errorf("failed to filter tables: %w", err) + } + if len(tables) == 0 { + return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") + } + + setParents(tables, nil) + if err := transformTables(tables); err != nil { + return err + } + if p.internalColumns { + if err := p.addInternalColumns(tables); err != nil { + return err + } + } + + p.maxDepth = maxDepth(tables) + if p.maxDepth > maxAllowedDepth { + return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) + } + } else { + tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) + if err != nil { + return fmt.Errorf("failed to filter tables: %w", err) + } + } + p.sessionTables = tables + return nil } @@ -254,6 +329,41 @@ func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables sche return p.client.Write(ctx, tables, res) } +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeOne(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resource arrow.Record) error { + resources := []arrow.Record{resource} + return p.writeAll(ctx, sourceSpec, syncTime, resources) +} + +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeAll(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resources []arrow.Record) error { + ch := make(chan arrow.Record, len(resources)) + for _, resource := range resources { + ch <- resource + } + close(ch) + tables := make(schema.Tables, 0) + tableNames := make(map[string]struct{}) + for _, resource := range resources { + sc := resource.Schema() + tableMD := sc.Metadata() + name, found := tableMD.GetValue(schema.MetadataTableName) + if !found { + return fmt.Errorf("missing table name") + } + if _, ok := tableNames[name]; ok { + continue + } + table, err := schema.NewTableFromArrowSchema(resource.Schema()) + if err != nil { + return err + } + tables = append(tables, table) + tableNames[table.Name] = struct{}{} + } + return p.Write(ctx, sourceSpec, tables, syncTime, ch) +} + func (p *Plugin) Write(ctx context.Context, sourceSpec pbPlugin.Spec, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { syncTime = syncTime.UTC() if err := p.client.Write(ctx, tables, res); err != nil { @@ -281,8 +391,23 @@ func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceNa return p.client.DeleteStale(ctx, tables, sourceName, syncTime) } +func (p *Plugin) syncAll(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec) ([]arrow.Record, error) { + var err error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + err = p.Sync(ctx, syncTime, syncSpec, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, err +} + // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error { +func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- arrow.Record) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } @@ -291,18 +416,28 @@ func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin startTime := time.Now() if p.unmanaged { - unmanagedClient := p.client.(UnmanagedClient) - if err := unmanagedClient.Sync(ctx, p.metrics, syncSpec, res); err != nil { + if err := p.client.Sync(ctx, p.metrics, res); err != nil { return fmt.Errorf("failed to sync unmanaged client: %w", err) } } else { - switch syncSpec.Scheduler { - case pbPlugin.SyncSpec_SCHEDULER_DFS: - p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, res) - case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: - p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, res) - default: - return fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String()) + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch syncSpec.Scheduler { + case pbPlugin.SyncSpec_SCHEDULER_DFS: + p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, resources) + case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: + p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, resources) + default: + panic(fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String())) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec } } @@ -322,5 +457,5 @@ func (p *Plugin) Close(ctx context.Context) error { } p.backend = nil } - return nil + return p.client.Close(ctx) } diff --git a/plugins/source/plugin_test.go b/plugin/plugin_managed_source_test.go similarity index 76% rename from plugins/source/plugin_test.go rename to plugin/plugin_managed_source_test.go index 08b38da24d..159c7dd9c8 100644 --- a/plugins/source/plugin_test.go +++ b/plugin/plugin_managed_source_test.go @@ -1,23 +1,24 @@ -package source +package plugin import ( "context" + "fmt" "testing" "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/transformers" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" "github.com/google/uuid" "github.com/rs/zerolog" - "github.com/stretchr/testify/assert" - "golang.org/x/sync/errgroup" ) -type testExecutionClient struct{} +type testExecutionClient struct { + UnimplementedWriter +} var _ schema.ClientMeta = &testExecutionClient{} @@ -137,7 +138,19 @@ func (*testExecutionClient) ID() string { return "testExecutionClient" } -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) { +func (*testExecutionClient) Close(context.Context) error { + return nil +} + +func (*testExecutionClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + +func (*testExecutionClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + +func newTestExecutionClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { return &testExecutionClient{}, nil } @@ -345,18 +358,18 @@ func (testRand) Read(p []byte) (n int, err error) { func TestSync(t *testing.T) { uuid.SetRand(testRand{}) - for _, scheduler := range specs.AllSchedulers { + for _, scheduler := range pbPlugin.SyncSpec_SCHEDULER_value { for _, tc := range syncTestCases { tc := tc tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { - testSyncTable(t, tc, scheduler, tc.deterministicCQID) + t.Run(tc.table.Name+"_"+pbPlugin.SyncSpec_SCHEDULER(scheduler).String(), func(t *testing.T) { + testSyncTable(t, tc, pbPlugin.SyncSpec_SCHEDULER(scheduler), tc.deterministicCQID) }) } } } -func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SCHEDULER, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, @@ -365,43 +378,43 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, det plugin := NewPlugin( "testSourcePlugin", "1.0.0", - tables, newTestExecutionClient, + WithStaticTables(tables), ) plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - spec := specs.Source{ - Name: "testSource", - Path: "cloudquery/testSource", - Tables: []string{"*"}, - Version: "v1.0.0", - Destinations: []string{"test"}, - Concurrency: 1, // choose a very low value to check that we don't run into deadlocks - Scheduler: scheduler, - DeterministicCQID: deterministicCQID, + spec := pbPlugin.Spec{ + Name: "testSource", + Path: "cloudquery/testSource", + Version: "v1.0.0", + SyncSpec: &pbPlugin.SyncSpec{ + Tables: []string{"*"}, + Destinations: []string{"test"}, + Concurrency: 1, // choose a very low value to check that we don't run into deadlocks + Scheduler: scheduler, + DetrministicCqId: deterministicCQID, + }, } if err := plugin.Init(ctx, spec); err != nil { t.Fatal(err) } - resources := make(chan *schema.Resource) - g, ctx := errgroup.WithContext(ctx) - g.Go(func() error { - defer close(resources) - return plugin.Sync(ctx, - testSyncTime, - resources) - }) + records, err := plugin.syncAll(ctx, testSyncTime, *spec.SyncSpec) + if err != nil { + t.Fatal(err) + } var i int - for resource := range resources { + for _, record := range records { if tc.data == nil { - t.Fatalf("Unexpected resource %v", resource) + t.Fatalf("Unexpected resource %v", record) } if i >= len(tc.data) { t.Fatalf("expected %d resources. got %d", len(tc.data), i) } - if !resource.GetValues().Equal(tc.data[i]) { - t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], resource.GetValues()) + rec := tc.data[i].ToArrowRecord(record.Schema()) + if !array.RecordEqual(rec, record) { + t.Fatal(RecordDiff(rec, record)) + // t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) } i++ } @@ -413,9 +426,6 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, det if !tc.stats.Equal(stats) { t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) } - if err := g.Wait(); err != nil { - t.Fatal(err) - } } func TestIgnoredColumns(t *testing.T) { @@ -440,31 +450,31 @@ var testTable struct { Quaternary string } -func TestNewPluginPrimaryKeys(t *testing.T) { - testTransforms := []struct { - transformerOptions []transformers.StructTransformerOption - resultKeys []string - }{ - { - transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, - resultKeys: []string{"primary_key"}, - }, - { - transformerOptions: []transformers.StructTransformerOption{}, - resultKeys: []string{"_cq_id"}, - }, - } - for _, tc := range testTransforms { - tables := []*schema.Table{ - { - Name: "test_table", - Transform: transformers.TransformWithStruct( - &testTable, tc.transformerOptions..., - ), - }, - } - - plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) - assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) - } -} +// func TestNewPluginPrimaryKeys(t *testing.T) { +// testTransforms := []struct { +// transformerOptions []transformers.StructTransformerOption +// resultKeys []string +// }{ +// { +// transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, +// resultKeys: []string{"primary_key"}, +// }, +// { +// transformerOptions: []transformers.StructTransformerOption{}, +// resultKeys: []string{"_cq_id"}, +// }, +// } +// for _, tc := range testTransforms { +// tables := []*schema.Table{ +// { +// Name: "test_table", +// Transform: transformers.TransformWithStruct( +// &testTable, tc.transformerOptions..., +// ), +// }, +// } + +// plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) +// assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) +// } +// } diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go index 9c4c094d6f..64b6472387 100644 --- a/plugin/plugin_round_robin_test.go +++ b/plugin/plugin_round_robin_test.go @@ -10,22 +10,33 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) type testPluginClient struct { - memoryDB map[string][]arrow.Record - tables map[string]*schema.Table - memoryDBLock sync.RWMutex + memoryDB map[string][]arrow.Record + tables map[string]*schema.Table + spec pbPlugin.Spec + memoryDBLock sync.RWMutex } type testPluginSpec struct { ConnectionString string `json:"connection_string"` } -func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { +func (c *testPluginClient) ID() string { + return "test-plugin" +} + +func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { + c.memoryDBLock.RLock() + for tableName := range c.memoryDB { + for _, row := range c.memoryDB[tableName] { + res <- row + } + } + c.memoryDBLock.RUnlock() return nil } @@ -48,7 +59,6 @@ func (c *testPluginClient) Migrate(ctx context.Context, tables schema.Tables) er c.tables[tableName] = table } return nil - return nil } func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, resources <-chan arrow.Record) error { @@ -60,7 +70,7 @@ func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, reso return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if c.spec.WriteMode == specs.WriteModeAppend { + if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -108,6 +118,14 @@ func (c *testPluginClient) deleteStaleTable(_ context.Context, table *schema.Tab } func (c *testPluginClient) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + for _, table := range tables { + c.deleteStaleTable(ctx, table, sourceName, syncTime) + } + return nil +} + +func (c *testPluginClient) Close(ctx context.Context) error { + c.memoryDB = nil return nil } @@ -136,13 +154,77 @@ func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, source return nil } -func NewTestPluginClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { +func NewTestPluginClient(ctx context.Context, logger zerolog.Logger, spec pbPlugin.Spec) (Client, error) { return &testPluginClient{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), + spec: spec, }, nil } func TestPluginRoundRobin(t *testing.T) { - p := NewPlugin("test", "v0.0.0", NewTestPluginClient) + ctx := context.Background() + p := NewPlugin("test", "v0.0.0", NewTestPluginClient, WithUnmanaged()) + testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) + syncTime := time.Now().UTC() + testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ + SourceName: "test", + SyncTime: syncTime, + MaxRows: 1, + }) + spec := pbPlugin.Spec{ + Name: "test", + Path: "cloudquery/test", + Version: "v1.0.0", + Registry: pbPlugin.Spec_REGISTRY_GITHUB, + WriteSpec: &pbPlugin.WriteSpec{}, + SyncSpec: &pbPlugin.SyncSpec{}, + } + if err := p.Init(ctx, spec); err != nil { + t.Fatal(err) + } + + if err := p.Migrate(ctx, schema.Tables{testTable}); err != nil { + t.Fatal(err) + } + if err := p.writeAll(ctx, spec, syncTime, testRecords); err != nil { + t.Fatal(err) + } + gotRecords, err := p.readAll(ctx, testTable, "test") + if err != nil { + t.Fatal(err) + } + if len(gotRecords) != len(testRecords) { + t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) + } + if !array.RecordEqual(testRecords[0], gotRecords[0]) { + t.Fatal("records are not equal") + } + records, err := p.syncAll(ctx, syncTime, *spec.SyncSpec) + if err != nil { + t.Fatal(err) + } + if len(records) != 1 { + t.Fatalf("got %d resources, want 1", len(records)) + } + + if !array.RecordEqual(testRecords[0], records[0]) { + t.Fatal("records are not equal") + } + + newSyncTime := time.Now().UTC() + if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { + t.Fatal(err) + } + records, err = p.syncAll(ctx, syncTime, *spec.SyncSpec) + if err != nil { + t.Fatal(err) + } + if len(records) != 0 { + t.Fatalf("got %d resources, want 0", len(records)) + } + + if err := p.Close(ctx); err != nil { + t.Fatal(err) + } } \ No newline at end of file diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go deleted file mode 100644 index 16afc7338c..0000000000 --- a/plugin/plugin_test.go +++ /dev/null @@ -1,470 +0,0 @@ -package plugin - -import ( - "context" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/transformers" - "github.com/google/go-cmp/cmp" - "github.com/google/uuid" - "github.com/rs/zerolog" - "github.com/stretchr/testify/assert" - "golang.org/x/sync/errgroup" -) - -type testExecutionClient struct{} - -var _ schema.ClientMeta = &testExecutionClient{} - -var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") -var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") - -var testSyncTime = time.Now() - -func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil -} - -func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { - panic("Resolver") -} - -func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { - panic("PreResourceResolver") -} - -func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { - panic("ColumnResolver") -} - -func testTableSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableSuccessWithPK() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - }, - } -} - -func testTableResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_resolver_panic", - Resolver: testResolverPanic, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTablePreResourceResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_pre_resource_resolver_panic", - PreResourceResolver: testPreResourceResolverPanic, - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableColumnResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_column_resolver_panic", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "test_column1", - Type: arrow.PrimitiveTypes.Int64, - Resolver: testColumnResolverPanic, - }, - }, - } -} - -func testTableRelationSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_relation_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - Relations: []*schema.Table{ - testTableSuccess(), - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) { - return &testExecutionClient{}, nil -} - -type syncTestCase struct { - table *schema.Table - stats Metrics - data []scalar.Vector - deterministicCQID bool -} - -var syncTestCases = []syncTestCase{ - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - { - table: testTablePreResourceResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_pre_resource_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableColumnResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_column_resolver_panic": { - "testExecutionClient": { - Panics: 1, - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - &scalar.Int64{}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableSuccessWithPK(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: deterministicStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, -} - -type testRand struct{} - -func (testRand) Read(p []byte) (n int, err error) { - for i := range p { - p[i] = byte(0) - } - return len(p), nil -} - -func TestSync(t *testing.T) { - uuid.SetRand(testRand{}) - for _, scheduler := range specs.AllSchedulers { - for _, tc := range syncTestCases { - tc := tc - tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { - testSyncTable(t, tc, scheduler, tc.deterministicCQID) - }) - } - } -} - -func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, deterministicCQID bool) { - ctx := context.Background() - tables := []*schema.Table{ - tc.table, - } - - plugin := NewPlugin( - "testSourcePlugin", - "1.0.0", - tables, - newTestExecutionClient, - ) - plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - spec := specs.Source{ - Name: "testSource", - Path: "cloudquery/testSource", - Tables: []string{"*"}, - Version: "v1.0.0", - Destinations: []string{"test"}, - Concurrency: 1, // choose a very low value to check that we don't run into deadlocks - Scheduler: scheduler, - DeterministicCQID: deterministicCQID, - } - if err := plugin.Init(ctx, spec); err != nil { - t.Fatal(err) - } - - resources := make(chan *schema.Resource) - g, ctx := errgroup.WithContext(ctx) - g.Go(func() error { - defer close(resources) - return plugin.Sync(ctx, - testSyncTime, - resources) - }) - - var i int - for resource := range resources { - if tc.data == nil { - t.Fatalf("Unexpected resource %v", resource) - } - if i >= len(tc.data) { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - if !resource.GetValues().Equal(tc.data[i]) { - t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], resource.GetValues()) - } - i++ - } - if len(tc.data) != i { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - - stats := plugin.Metrics() - if !tc.stats.Equal(stats) { - t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) - } - if err := g.Wait(); err != nil { - t.Fatal(err) - } -} - -func TestIgnoredColumns(t *testing.T) { - validateResources(t, schema.Resources{{ - Item: struct{ A *string }{}, - Table: &schema.Table{ - Columns: schema.ColumnList{ - { - Name: "a", - Type: arrow.BinaryTypes.String, - IgnoreInTests: true, - }, - }, - }, - }}) -} - -var testTable struct { - PrimaryKey string - SecondaryKey string - TertiaryKey string - Quaternary string -} - -func TestNewPluginPrimaryKeys(t *testing.T) { - testTransforms := []struct { - transformerOptions []transformers.StructTransformerOption - resultKeys []string - }{ - { - transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, - resultKeys: []string{"primary_key"}, - }, - { - transformerOptions: []transformers.StructTransformerOption{}, - resultKeys: []string{"_cq_id"}, - }, - } - for _, tc := range testTransforms { - tables := []*schema.Table{ - { - Name: "test_table", - Transform: transformers.TransformWithStruct( - &testTable, tc.transformerOptions..., - ), - }, - } - - plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) - assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) - } -} diff --git a/plugin/scheduler.go b/plugin/scheduler.go index 373147d194..c00ed2c8a9 100644 --- a/plugin/scheduler.go +++ b/plugin/scheduler.go @@ -9,7 +9,7 @@ import ( "sync/atomic" "time" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "github.com/rs/zerolog" "github.com/thoas/go-funk" diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go index 9390966395..9b592be865 100644 --- a/plugin/scheduler_dfs.go +++ b/plugin/scheduler_dfs.go @@ -8,9 +8,9 @@ import ( "sync" "sync/atomic" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/helpers" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/helpers" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "golang.org/x/sync/semaphore" ) diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go index 0554f5489e..5c6e90b391 100644 --- a/plugin/scheduler_round_robin.go +++ b/plugin/scheduler_round_robin.go @@ -4,8 +4,8 @@ import ( "context" "sync" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" "golang.org/x/sync/semaphore" ) diff --git a/plugin/scheduler_round_robin_test.go b/plugin/scheduler_round_robin_test.go index daf7cc242f..428b13c8a6 100644 --- a/plugin/scheduler_round_robin_test.go +++ b/plugin/scheduler_round_robin_test.go @@ -3,7 +3,7 @@ package plugin import ( "testing" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) func TestRoundRobinInterleave(t *testing.T) { diff --git a/plugins/destination/plugin_testing_overwrite_delete_stale.go b/plugin/testing_overwrite_deletestale.go similarity index 91% rename from plugins/destination/plugin_testing_overwrite_delete_stale.go rename to plugin/testing_overwrite_deletestale.go index 4339bb1d43..788decd8a4 100644 --- a/plugins/destination/plugin_testing_overwrite_delete_stale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -1,20 +1,21 @@ -package destination +package plugin import ( "context" "fmt" "time" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec specs.Destination, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteMode = specs.WriteModeOverwriteDeleteStale - if err := p.Init(ctx, logger, spec); err != nil { +func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { + spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) @@ -31,9 +32,9 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, - Backend: specs.BackendLocal, + // Backend: specs.BackendLocal, } opts := schema.GenTestDataOptions{ @@ -149,4 +150,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } return nil -} +} \ No newline at end of file diff --git a/plugin/testing.go b/plugin/testing_sync.go similarity index 73% rename from plugin/testing.go rename to plugin/testing_sync.go index 562da87461..0c0d6f939e 100644 --- a/plugin/testing.go +++ b/plugin/testing_sync.go @@ -2,11 +2,15 @@ package plugin import ( "context" + "fmt" + "strings" "testing" "time" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) @@ -25,7 +29,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te t.Parallel() } - resourcesChannel := make(chan *schema.Resource) + resourcesChannel := make(chan arrow.Record) var syncErr error if err := plugin.Init(context.Background(), spec); err != nil { @@ -37,16 +41,16 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te syncErr = plugin.Sync(context.Background(), time.Now(), *spec.SyncSpec, resourcesChannel) }() - syncedResources := make([]*schema.Resource, 0) + syncedResources := make([]arrow.Record, 0) for resource := range resourcesChannel { syncedResources = append(syncedResources, resource) } if syncErr != nil { t.Fatal(syncErr) } - for _, validator := range o.validators { - validator(t, plugin, syncedResources) - } + // for _, validator := range o.validators { + // validator(t, plugin, syncedResources) + // } } type TestPluginOption func(*testPluginOptions) @@ -139,3 +143,27 @@ func validateResources(t *testing.T, resources []*schema.Resource) { } } } + +func RecordDiff(l arrow.Record, r arrow.Record) string { + var sb strings.Builder + if l.NumCols() != r.NumCols() { + return fmt.Sprintf("different number of columns: %d vs %d", l.NumCols(), r.NumCols()) + } + if l.NumRows() != r.NumRows() { + return fmt.Sprintf("different number of rows: %d vs %d", l.NumRows(), r.NumRows()) + } + for i := 0; i < int(l.NumCols()); i++ { + edits, err := array.Diff(l.Column(i), r.Column(i)) + if err != nil { + panic(fmt.Sprintf("left: %v, right: %v, error: %v", l.Column(i).DataType(), r.Column(i).DataType(), err)) + } + diff := edits.UnifiedDiff(l.Column(i), r.Column(i)) + if diff != "" { + sb.WriteString(l.Schema().Field(i).Name) + sb.WriteString(": ") + sb.WriteString(diff) + sb.WriteString("\n") + } + } + return sb.String() +} diff --git a/plugins/destination/plugin_testing.go b/plugin/testing_write.go similarity index 88% rename from plugins/destination/plugin_testing.go rename to plugin/testing_write.go index c3ee806aed..17fc3f6100 100644 --- a/plugins/destination/plugin_testing.go +++ b/plugin/testing_write.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "context" @@ -10,9 +10,9 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/rs/zerolog" ) @@ -22,11 +22,11 @@ type PluginTestSuite struct { // MigrateStrategy defines which tests we should include type MigrateStrategy struct { - AddColumn specs.MigrateMode - AddColumnNotNull specs.MigrateMode - RemoveColumn specs.MigrateMode - RemoveColumnNotNull specs.MigrateMode - ChangeColumn specs.MigrateMode + AddColumn pbPlugin.WriteSpec_MIGRATE_MODE + AddColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE + RemoveColumn pbPlugin.WriteSpec_MIGRATE_MODE + RemoveColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE + ChangeColumn pbPlugin.WriteSpec_MIGRATE_MODE } type PluginTestSuiteTests struct { @@ -167,7 +167,7 @@ func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { } } -func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs.Destination, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { +func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlugin.Spec, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { t.Helper() destSpec.Name = "testsuite" @@ -222,8 +222,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateOverwrite { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeOverwrite - destSpec.MigrateMode = specs.MigrateModeSafe + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE destSpec.Name = "test_migrate_overwrite" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) }) @@ -233,8 +233,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateOverwriteForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeOverwrite - destSpec.MigrateMode = specs.MigrateModeForced + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE destSpec.Name = "test_migrate_overwrite_force" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) }) @@ -259,8 +259,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateAppend { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeAppend - destSpec.MigrateMode = specs.MigrateModeSafe + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE destSpec.Name = "test_migrate_append" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) }) @@ -270,8 +270,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateAppendForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeAppend - destSpec.MigrateMode = specs.MigrateModeForced + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE destSpec.Name = "test_migrate_append_force" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) }) @@ -291,4 +291,4 @@ func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { } return first.Before(second) }) -} +} \ No newline at end of file diff --git a/plugins/destination/plugin_testing_write_append.go b/plugin/testing_write_append.go similarity index 85% rename from plugins/destination/plugin_testing_write_append.go rename to plugin/testing_write_append.go index a3f0445c27..d56d20287e 100644 --- a/plugins/destination/plugin_testing_write_append.go +++ b/plugin/testing_write_append.go @@ -1,19 +1,20 @@ -package destination +package plugin import ( "context" "fmt" "time" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, spec specs.Destination, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteMode = specs.WriteModeAppend - if err := p.Init(ctx, logger, spec); err != nil { +func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { + spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) @@ -27,7 +28,7 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } sourceName := "testAppendSource" + uuid.NewString() - specSource := specs.Source{ + specSource := pbPlugin.Spec{ Name: sourceName, } @@ -92,4 +93,4 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } return nil -} +} \ No newline at end of file diff --git a/plugins/destination/plugin_testing_migrate.go b/plugin/testing_write_migrate.go similarity index 85% rename from plugins/destination/plugin_testing_migrate.go rename to plugin/testing_write_migrate.go index b28ef18f50..d0c8b54ea8 100644 --- a/plugins/destination/plugin_testing_migrate.go +++ b/plugin/testing_write_migrate.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "context" @@ -8,9 +8,10 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" "github.com/stretchr/testify/require" @@ -20,8 +21,8 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, spec specs.Destination, target *schema.Table, source *schema.Table, mode specs.MigrateMode, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, logger, spec); err != nil { +func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, target *schema.Table, source *schema.Table, mode pbPlugin.WriteSpec_MIGRATE_MODE, testOpts PluginTestSuiteRunnerOptions) error { + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } @@ -30,7 +31,7 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. } sourceName := target.Name - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } syncTime := time.Now().UTC().Round(1 * time.Second) @@ -64,7 +65,7 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. return fmt.Errorf("failed to read all: %w", err) } sortRecordsBySyncTime(target, resourcesRead) - if mode == specs.MigrateModeSafe { + if mode == pbPlugin.WriteSpec_SAFE { if len(resourcesRead) != 2 { return fmt.Errorf("expected 2 resources after write, got %d", len(resourcesRead)) } @@ -90,14 +91,14 @@ func (*PluginTestSuite) destinationPluginTestMigrate( t *testing.T, newPlugin NewPluginFunc, logger zerolog.Logger, - spec specs.Destination, + spec pbPlugin.Spec, strategy MigrateStrategy, testOpts PluginTestSuiteRunnerOptions, ) { - spec.BatchSize = 1 + spec.WriteSpec.BatchSize = 1 t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.AddColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -133,7 +134,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.AddColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -167,7 +168,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.RemoveColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -200,7 +201,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.RemoveColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -234,7 +235,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.ChangeColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -272,12 +273,12 @@ func (*PluginTestSuite) destinationPluginTestMigrate( table := schema.TestTable(tableName, testOpts.TestSourceOptions) p := newPlugin() - require.NoError(t, p.Init(ctx, logger, spec)) + require.NoError(t, p.Init(ctx, spec)) require.NoError(t, p.Migrate(ctx, schema.Tables{table})) nonForced := spec - nonForced.MigrateMode = specs.MigrateModeSafe - require.NoError(t, p.Init(ctx, logger, nonForced)) + nonForced.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + require.NoError(t, p.Init(ctx, nonForced)) require.NoError(t, p.Migrate(ctx, schema.Tables{table})) }) -} +} \ No newline at end of file diff --git a/plugins/destination/plugin_testing_overwrite.go b/plugin/testing_write_overwrite.go similarity index 87% rename from plugins/destination/plugin_testing_overwrite.go rename to plugin/testing_write_overwrite.go index f77285ff63..a7dba53037 100644 --- a/plugins/destination/plugin_testing_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -1,20 +1,21 @@ -package destination +package plugin import ( "context" "fmt" "time" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec specs.Destination, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteMode = specs.WriteModeOverwrite - if err := p.Init(ctx, logger, spec); err != nil { +func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { + spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) @@ -28,7 +29,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } @@ -108,4 +109,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } return nil -} +} \ No newline at end of file diff --git a/plugin/validate.go b/plugin/validate.go index 0b21133b05..6f557e9c1f 100644 --- a/plugin/validate.go +++ b/plugin/validate.go @@ -3,7 +3,7 @@ package plugin import ( "fmt" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) func (p *Plugin) validate(tables schema.Tables) error { diff --git a/plugins/destination/metrics.go b/plugins/destination/metrics.go deleted file mode 100644 index d00613ecf8..0000000000 --- a/plugins/destination/metrics.go +++ /dev/null @@ -1,8 +0,0 @@ -package destination - -type Metrics struct { - // Errors number of errors / failed writes - Errors uint64 - // Writes number of successful writes - Writes uint64 -} diff --git a/plugins/destination/plugin.go b/plugins/destination/plugin.go deleted file mode 100644 index 1d40f6af80..0000000000 --- a/plugins/destination/plugin.go +++ /dev/null @@ -1,314 +0,0 @@ -package destination - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" -) - -type writerType int - -const ( - unmanaged writerType = iota - managed -) - -const ( - defaultBatchTimeoutSeconds = 20 - defaultBatchSize = 10000 - defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB -) - -type NewClientFunc func(context.Context, zerolog.Logger, specs.Destination) (Client, error) - -type ManagedWriter interface { - WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error -} - -type UnimplementedManagedWriter struct{} - -var _ ManagedWriter = UnimplementedManagedWriter{} - -func (UnimplementedManagedWriter) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { - panic("WriteTableBatch not implemented") -} - -type UnmanagedWriter interface { - Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error - Metrics() Metrics -} - -var _ UnmanagedWriter = UnimplementedUnmanagedWriter{} - -type UnimplementedUnmanagedWriter struct{} - -func (UnimplementedUnmanagedWriter) Write(context.Context, schema.Tables, <-chan arrow.Record) error { - panic("Write not implemented") -} - -func (UnimplementedUnmanagedWriter) Metrics() Metrics { - panic("Metrics not implemented") -} - -type Client interface { - Migrate(ctx context.Context, tables schema.Tables) error - Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error - ManagedWriter - UnmanagedWriter - DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error - Close(ctx context.Context) error -} - -type ClientResource struct { - TableName string - Data []any -} - -type Option func(*Plugin) - -type Plugin struct { - // Name of destination plugin i.e postgresql,snowflake - name string - // Version of the destination plugin - version string - // Called upon configure call to validate and init configuration - newClient NewClientFunc - writerType writerType - // initialized destination client - client Client - // spec the client was initialized with - spec specs.Destination - // Logger to call, this logger is passed to the serve.Serve Client, if not define Serve will create one instead. - logger zerolog.Logger - - // This is in use if the user passed a managed client - metrics map[string]*Metrics - metricsLock *sync.RWMutex - - workers map[string]*worker - workersLock *sync.Mutex - - batchTimeout time.Duration - defaultBatchSize int - defaultBatchSizeBytes int -} - -func WithManagedWriter() Option { - return func(p *Plugin) { - p.writerType = managed - } -} - -func WithBatchTimeout(seconds int) Option { - return func(p *Plugin) { - p.batchTimeout = time.Duration(seconds) * time.Second - } -} - -func WithDefaultBatchSize(defaultBatchSize int) Option { - return func(p *Plugin) { - p.defaultBatchSize = defaultBatchSize - } -} - -func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { - return func(p *Plugin) { - p.defaultBatchSizeBytes = defaultBatchSizeBytes - } -} - -// NewPlugin creates a new destination plugin -func NewPlugin(name string, version string, newClientFunc NewClientFunc, opts ...Option) *Plugin { - p := &Plugin{ - name: name, - version: version, - newClient: newClientFunc, - metrics: make(map[string]*Metrics), - metricsLock: &sync.RWMutex{}, - workers: make(map[string]*worker), - workersLock: &sync.Mutex{}, - batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, - defaultBatchSize: defaultBatchSize, - defaultBatchSizeBytes: defaultBatchSizeBytes, - } - if newClientFunc == nil { - // we do this check because we only call this during runtime later on so it can fail - // before the server starts - panic("newClientFunc can't be nil") - } - for _, opt := range opts { - opt(p) - } - return p -} - -func (p *Plugin) Name() string { - return p.name -} - -func (p *Plugin) Version() string { - return p.version -} - -func (p *Plugin) Metrics() Metrics { - switch p.writerType { - case unmanaged: - return p.client.Metrics() - case managed: - metrics := Metrics{} - p.metricsLock.RLock() - for _, m := range p.metrics { - metrics.Errors += m.Errors - metrics.Writes += m.Writes - } - p.metricsLock.RUnlock() - return metrics - default: - panic("unknown client type") - } -} - -// we need lazy loading because we want to be able to initialize after -func (p *Plugin) Init(ctx context.Context, logger zerolog.Logger, spec specs.Destination) error { - var err error - p.logger = logger - p.spec = spec - p.spec.SetDefaults(p.defaultBatchSize, p.defaultBatchSizeBytes) - p.client, err = p.newClient(ctx, logger, p.spec) - if err != nil { - return err - } - return nil -} - -// we implement all DestinationClient functions so we can hook into pre-post behavior -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables) error { - if err := checkDestinationColumns(tables); err != nil { - return err - } - return p.client.Migrate(ctx, tables) -} - -func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { - var readErr error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - readErr = p.Read(ctx, table, sourceName, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, readErr -} - -func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return p.client.Read(ctx, table, sourceName, res) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, sourceSpec specs.Source, syncTime time.Time, resource arrow.Record) error { - resources := []arrow.Record{resource} - return p.writeAll(ctx, sourceSpec, syncTime, resources) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, sourceSpec specs.Source, syncTime time.Time, resources []arrow.Record) error { - ch := make(chan arrow.Record, len(resources)) - for _, resource := range resources { - ch <- resource - } - close(ch) - tables := make(schema.Tables, 0) - tableNames := make(map[string]struct{}) - for _, resource := range resources { - sc := resource.Schema() - tableMD := sc.Metadata() - name, found := tableMD.GetValue(schema.MetadataTableName) - if !found { - return fmt.Errorf("missing table name") - } - if _, ok := tableNames[name]; ok { - continue - } - table, err := schema.NewTableFromArrowSchema(resource.Schema()) - if err != nil { - return err - } - tables = append(tables, table) - tableNames[table.Name] = struct{}{} - } - return p.Write(ctx, sourceSpec, tables, syncTime, ch) -} - -func (p *Plugin) Write(ctx context.Context, sourceSpec specs.Source, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { - syncTime = syncTime.UTC() - if err := checkDestinationColumns(tables); err != nil { - return err - } - switch p.writerType { - case unmanaged: - if err := p.writeUnmanaged(ctx, sourceSpec, tables, syncTime, res); err != nil { - return err - } - case managed: - if err := p.writeManagedTableBatch(ctx, sourceSpec, tables, syncTime, res); err != nil { - return err - } - default: - panic("unknown client type") - } - if p.spec.WriteMode == specs.WriteModeOverwriteDeleteStale { - tablesToDelete := tables - if sourceSpec.Backend != specs.BackendNone { - tablesToDelete = make(schema.Tables, 0, len(tables)) - for _, t := range tables { - if !t.IsIncremental { - tablesToDelete = append(tablesToDelete, t) - } - } - } - if err := p.DeleteStale(ctx, tablesToDelete, sourceSpec.Name, syncTime); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - syncTime = syncTime.UTC() - return p.client.DeleteStale(ctx, tables, sourceName, syncTime) -} - -func (p *Plugin) Close(ctx context.Context) error { - return p.client.Close(ctx) -} - -func checkDestinationColumns(tables schema.Tables) error { - for _, table := range tables { - if table.Columns.Index(schema.CqSourceNameColumn.Name) == -1 { - return fmt.Errorf("table %s is missing column %s. please consider upgrading source plugin", table.Name, schema.CqSourceNameColumn.Name) - } - if table.Columns.Index(schema.CqSyncTimeColumn.Name) == -1 { - return fmt.Errorf("table %s is missing column %s. please consider upgrading source plugin", table.Name, schema.CqSourceNameColumn.Name) - } - column := table.Columns.Get(schema.CqIDColumn.Name) - if column != nil { - if !column.NotNull { - return fmt.Errorf("column %s.%s cannot be nullable. please consider upgrading source plugin", table.Name, schema.CqIDColumn.Name) - } - if !column.Unique { - return fmt.Errorf("column %s.%s must be unique. please consider upgrading source plugin", table.Name, schema.CqIDColumn.Name) - } - } - } - return nil -} diff --git a/plugins/destination/unmanaged_writer.go b/plugins/destination/unmanaged_writer.go deleted file mode 100644 index cdb3466b09..0000000000 --- a/plugins/destination/unmanaged_writer.go +++ /dev/null @@ -1,14 +0,0 @@ -package destination - -import ( - "context" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { - return p.client.Write(ctx, tables, res) -} diff --git a/plugins/docs.go b/plugins/docs.go deleted file mode 100644 index 2e21a01945..0000000000 --- a/plugins/docs.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package plugins defines APIs for source and destination plugins -package plugins diff --git a/plugins/source/benchmark_test.go b/plugins/source/benchmark_test.go deleted file mode 100644 index 71ccdc929d..0000000000 --- a/plugins/source/benchmark_test.go +++ /dev/null @@ -1,429 +0,0 @@ -package source - -import ( - "context" - "fmt" - "math/rand" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" - "golang.org/x/sync/errgroup" -) - -type BenchmarkScenario struct { - Client Client - Scheduler specs.Scheduler - Clients int - Tables int - ChildrenPerTable int - Columns int - ColumnResolvers int // number of columns with custom resolvers - ResourcesPerTable int - ResourcesPerPage int - NoPreResourceResolver bool - Concurrency uint64 -} - -func (s *BenchmarkScenario) SetDefaults() { - if s.Clients == 0 { - s.Clients = 1 - } - if s.Tables == 0 { - s.Tables = 1 - } - if s.Columns == 0 { - s.Columns = 10 - } - if s.ResourcesPerTable == 0 { - s.ResourcesPerTable = 100 - } - if s.ResourcesPerPage == 0 { - s.ResourcesPerPage = 10 - } -} - -type Client interface { - Call(clientID, tableName string) error -} - -type Benchmark struct { - *BenchmarkScenario - - b *testing.B - tables []*schema.Table - plugin *Plugin - - apiCalls atomic.Int64 -} - -func NewBenchmark(b *testing.B, scenario BenchmarkScenario) *Benchmark { - scenario.SetDefaults() - sb := &Benchmark{ - BenchmarkScenario: &scenario, - b: b, - tables: nil, - plugin: nil, - } - sb.setup(b) - return sb -} - -func (s *Benchmark) setup(b *testing.B) { - createResolvers := func(tableName string) (schema.TableResolver, schema.RowResolver, schema.ColumnResolver) { - tableResolver := func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { - total := 0 - for total < s.ResourcesPerTable { - s.simulateAPICall(meta.ID(), tableName) - num := min(s.ResourcesPerPage, s.ResourcesPerTable-total) - resources := make([]struct { - Column1 string - }, num) - for i := 0; i < num; i++ { - resources[i] = struct { - Column1 string - }{ - Column1: "test-column", - } - } - res <- resources - total += num - } - return nil - } - preResourceResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource) error { - s.simulateAPICall(meta.ID(), tableName) - resource.Item = struct { - Column1 string - }{ - Column1: "test-pre", - } - return nil - } - columnResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - s.simulateAPICall(meta.ID(), tableName) - return resource.Set(c.Name, "test") - } - return tableResolver, preResourceResolver, columnResolver - } - - s.tables = make([]*schema.Table, s.Tables) - for i := 0; i < s.Tables; i++ { - tableResolver, preResourceResolver, columnResolver := createResolvers(fmt.Sprintf("table%d", i)) - columns := make([]schema.Column, s.Columns) - for u := 0; u < s.Columns; u++ { - columns[u] = schema.Column{ - Name: fmt.Sprintf("column%d", u), - Type: arrow.BinaryTypes.String, - } - if u < s.ColumnResolvers { - columns[u].Resolver = columnResolver - } - } - relations := make([]*schema.Table, s.ChildrenPerTable) - for u := 0; u < s.ChildrenPerTable; u++ { - relations[u] = &schema.Table{ - Name: fmt.Sprintf("table%d_child%d", i, u), - Columns: columns, - Resolver: tableResolver, - } - if !s.NoPreResourceResolver { - relations[u].PreResourceResolver = preResourceResolver - } - } - s.tables[i] = &schema.Table{ - Name: fmt.Sprintf("table%d", i), - Columns: columns, - Relations: relations, - Resolver: tableResolver, - Multiplex: nMultiplexer(s.Clients), - } - if !s.NoPreResourceResolver { - s.tables[i].PreResourceResolver = preResourceResolver - } - for u := range relations { - relations[u].Parent = s.tables[i] - } - } - - plugin := NewPlugin( - "testPlugin", - "1.0.0", - s.tables, - newTestExecutionClient, - ) - plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(b)).Level(zerolog.WarnLevel)) - s.plugin = plugin - s.b = b -} - -func (s *Benchmark) simulateAPICall(clientID, tableName string) { - for { - s.apiCalls.Add(1) - err := s.Client.Call(clientID, tableName) - if err == nil { - // if no error, we are done - break - } - // if error, we have to retry - // we simulate a random backoff - time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) - } -} - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -func (s *Benchmark) Run() { - for n := 0; n < s.b.N; n++ { - s.b.StopTimer() - ctx := context.Background() - spec := specs.Source{ - Name: "testSource", - Path: "cloudquery/testSource", - Tables: []string{"*"}, - Version: "v1.0.0", - Destinations: []string{"test"}, - Concurrency: s.Concurrency, - Scheduler: s.Scheduler, - } - if err := s.plugin.Init(ctx, spec); err != nil { - s.b.Fatal(err) - } - resources := make(chan *schema.Resource) - g, ctx := errgroup.WithContext(ctx) - g.Go(func() error { - defer close(resources) - return s.plugin.Sync(ctx, - time.Now(), - resources) - }) - s.b.StartTimer() - start := time.Now() - - totalResources := 0 - for range resources { - // read resources channel until empty - totalResources++ - } - if err := g.Wait(); err != nil { - s.b.Fatal(err) - } - - end := time.Now() - s.b.ReportMetric(0, "ns/op") // drop default ns/op output - s.b.ReportMetric(float64(totalResources)/(end.Sub(start).Seconds()), "resources/s") - - // Enable the below metrics for more verbose information about the scenario: - // s.b.ReportMetric(float64(s.apiCalls.Load())/(end.Sub(start).Seconds()), "api-calls/s") - // s.b.ReportMetric(float64(totalResources), "resources") - // s.b.ReportMetric(float64(s.apiCalls.Load()), "apiCalls") - } -} - -type benchmarkClient struct { - num int -} - -func (b benchmarkClient) ID() string { - return fmt.Sprintf("client%d", b.num) -} - -func nMultiplexer(n int) schema.Multiplexer { - return func(meta schema.ClientMeta) []schema.ClientMeta { - clients := make([]schema.ClientMeta, n) - for i := 0; i < n; i++ { - clients[i] = benchmarkClient{ - num: i, - } - } - return clients - } -} - -func BenchmarkDefaultConcurrencyDFS(b *testing.B) { - benchmarkWithScheduler(b, specs.SchedulerDFS) -} - -func BenchmarkDefaultConcurrencyRoundRobin(b *testing.B) { - benchmarkWithScheduler(b, specs.SchedulerRoundRobin) -} - -func benchmarkWithScheduler(b *testing.B, scheduler specs.Scheduler) { - b.ReportAllocs() - minTime := 1 * time.Millisecond - mean := 10 * time.Millisecond - stdDev := 100 * time.Millisecond - client := NewDefaultClient(minTime, mean, stdDev) - bs := BenchmarkScenario{ - Client: client, - Clients: 25, - Tables: 5, - Columns: 10, - ColumnResolvers: 1, - ResourcesPerTable: 100, - ResourcesPerPage: 50, - Scheduler: scheduler, - } - sb := NewBenchmark(b, bs) - sb.Run() -} - -func BenchmarkTablesWithChildrenDFS(b *testing.B) { - benchmarkTablesWithChildrenScheduler(b, specs.SchedulerDFS) -} - -func BenchmarkTablesWithChildrenRoundRobin(b *testing.B) { - benchmarkTablesWithChildrenScheduler(b, specs.SchedulerRoundRobin) -} - -func benchmarkTablesWithChildrenScheduler(b *testing.B, scheduler specs.Scheduler) { - b.ReportAllocs() - minTime := 1 * time.Millisecond - mean := 10 * time.Millisecond - stdDev := 100 * time.Millisecond - client := NewDefaultClient(minTime, mean, stdDev) - bs := BenchmarkScenario{ - Client: client, - Clients: 2, - Tables: 2, - ChildrenPerTable: 2, - Columns: 10, - ColumnResolvers: 1, - ResourcesPerTable: 100, - ResourcesPerPage: 50, - Scheduler: scheduler, - } - sb := NewBenchmark(b, bs) - sb.Run() -} - -type DefaultClient struct { - min, stdDev, mean time.Duration -} - -func NewDefaultClient(min, mean, stdDev time.Duration) *DefaultClient { - if min == 0 { - min = time.Millisecond - } - if mean == 0 { - mean = 10 * time.Millisecond - } - if stdDev == 0 { - stdDev = 100 * time.Millisecond - } - return &DefaultClient{ - min: min, - mean: mean, - stdDev: stdDev, - } -} - -func (c *DefaultClient) Call(_, _ string) error { - sample := int(rand.NormFloat64()*float64(c.stdDev) + float64(c.mean)) - duration := time.Duration(sample) - if duration < c.min { - duration = c.min - } - time.Sleep(duration) - return nil -} - -type RateLimitClient struct { - *DefaultClient - calls map[string][]time.Time - callsLock sync.Mutex - window time.Duration - maxCallsPerWindow int -} - -func NewRateLimitClient(min, mean, stdDev time.Duration, maxCallsPerWindow int, window time.Duration) *RateLimitClient { - return &RateLimitClient{ - DefaultClient: NewDefaultClient(min, mean, stdDev), - calls: map[string][]time.Time{}, - window: window, - maxCallsPerWindow: maxCallsPerWindow, - } -} - -func (r *RateLimitClient) Call(clientID, table string) error { - // this will sleep for the appropriate amount of time before responding - err := r.DefaultClient.Call(clientID, table) - if err != nil { - return err - } - - r.callsLock.Lock() - defer r.callsLock.Unlock() - - // limit the number of calls per window by table - key := table - - // remove calls from outside the call window - updated := make([]time.Time, 0, len(r.calls[key])) - for i := range r.calls[key] { - if time.Since(r.calls[key][i]) < r.window { - updated = append(updated, r.calls[key][i]) - } - } - - // return error if we've exceeded the max calls in the time window - if len(updated) >= r.maxCallsPerWindow { - return fmt.Errorf("rate limit exceeded") - } - - r.calls[key] = append(r.calls[key], time.Now()) - return nil -} - -// BenchmarkDefaultConcurrency represents a benchmark scenario where rate limiting is applied -// by the cloud provider. In this rate limiter, the limit is applied globally per table. -// This mirrors the behavior of GCP, where rate limiting is applied per project *token*, not -// per project. A good scheduler should spread the load across tables so that other tables can make -// progress while waiting for the rate limit to reset. -func BenchmarkTablesWithRateLimitingDFS(b *testing.B) { - benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerDFS) -} - -func BenchmarkTablesWithRateLimitingRoundRobin(b *testing.B) { - benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerRoundRobin) -} - -// In this benchmark, we set up a scenario where each table has a global rate limit of 1 call per 100ms. -// Every table requires 1 call to resolve, and has 10 clients. This means, at best, each table can resolve in 1 second. -// We have 100 such tables and a concurrency that allows 1000 calls at a time. A good scheduler for this scenario -// should be able to resolve all tables in a bit more than 1 second. -func benchmarkTablesWithRateLimitingScheduler(b *testing.B, scheduler specs.Scheduler) { - b.ReportAllocs() - minTime := 1 * time.Millisecond - mean := 1 * time.Millisecond - stdDev := 1 * time.Millisecond - maxCallsPerWindow := 1 - window := 100 * time.Millisecond - c := NewRateLimitClient(minTime, mean, stdDev, maxCallsPerWindow, window) - - bs := BenchmarkScenario{ - Client: c, - Scheduler: scheduler, - Clients: 10, - Tables: 100, - ChildrenPerTable: 0, - Columns: 10, - ColumnResolvers: 0, - ResourcesPerTable: 1, - ResourcesPerPage: 1, - Concurrency: 1000, - NoPreResourceResolver: true, - } - sb := NewBenchmark(b, bs) - sb.Run() -} diff --git a/plugins/source/docs.go b/plugins/source/docs.go deleted file mode 100644 index f21d926856..0000000000 --- a/plugins/source/docs.go +++ /dev/null @@ -1,241 +0,0 @@ -package source - -import ( - "bytes" - "embed" - "encoding/json" - "fmt" - "os" - "path/filepath" - "regexp" - "sort" - "text/template" - - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -//go:embed templates/*.go.tpl -var templatesFS embed.FS - -var reMatchNewlines = regexp.MustCompile(`\n{3,}`) -var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) - -var DefaultTitleExceptions = map[string]string{ - // common abbreviations - "acl": "ACL", - "acls": "ACLs", - "api": "API", - "apis": "APIs", - "ca": "CA", - "cidr": "CIDR", - "cidrs": "CIDRs", - "db": "DB", - "dbs": "DBs", - "dhcp": "DHCP", - "iam": "IAM", - "iot": "IOT", - "ip": "IP", - "ips": "IPs", - "ipv4": "IPv4", - "ipv6": "IPv6", - "mfa": "MFA", - "ml": "ML", - "oauth": "OAuth", - "vpc": "VPC", - "vpcs": "VPCs", - "vpn": "VPN", - "vpns": "VPNs", - "waf": "WAF", - "wafs": "WAFs", - - // cloud providers - "aws": "AWS", - "gcp": "GCP", -} - -func DefaultTitleTransformer(table *schema.Table) string { - if table.Title != "" { - return table.Title - } - csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) - return csr.ToTitle(table.Name) -} - -func sortTables(tables schema.Tables) { - sort.SliceStable(tables, func(i, j int) bool { - return tables[i].Name < tables[j].Name - }) - - for _, table := range tables { - sortTables(table.Relations) - } -} - -type templateData struct { - PluginName string - Tables schema.Tables -} - -// GeneratePluginDocs creates table documentation for the source plugin based on its list of tables -func (p *Plugin) GeneratePluginDocs(dir, format string) error { - if err := os.MkdirAll(dir, os.ModePerm); err != nil { - return err - } - - setDestinationManagedCqColumns(p.Tables()) - - sortedTables := make(schema.Tables, 0, len(p.Tables())) - for _, t := range p.Tables() { - sortedTables = append(sortedTables, t.Copy(nil)) - } - sortTables(sortedTables) - - switch format { - case "markdown": - return p.renderTablesAsMarkdown(dir, p.name, sortedTables) - case "json": - return p.renderTablesAsJSON(dir, sortedTables) - default: - return fmt.Errorf("unsupported format: %v", format) - } -} - -// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). -func setDestinationManagedCqColumns(tables []*schema.Table) { - for _, table := range tables { - table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) - table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) - setDestinationManagedCqColumns(table.Relations) - } -} - -type jsonTable struct { - Name string `json:"name"` - Title string `json:"title"` - Description string `json:"description"` - Columns []jsonColumn `json:"columns"` - Relations []jsonTable `json:"relations"` -} - -type jsonColumn struct { - Name string `json:"name"` - Type string `json:"type"` - IsPrimaryKey bool `json:"is_primary_key,omitempty"` - IsIncrementalKey bool `json:"is_incremental_key,omitempty"` -} - -func (p *Plugin) renderTablesAsJSON(dir string, tables schema.Tables) error { - jsonTables := p.jsonifyTables(tables) - buffer := &bytes.Buffer{} - m := json.NewEncoder(buffer) - m.SetIndent("", " ") - m.SetEscapeHTML(false) - err := m.Encode(jsonTables) - if err != nil { - return err - } - outputPath := filepath.Join(dir, "__tables.json") - return os.WriteFile(outputPath, buffer.Bytes(), 0644) -} - -func (p *Plugin) jsonifyTables(tables schema.Tables) []jsonTable { - jsonTables := make([]jsonTable, len(tables)) - for i, table := range tables { - jsonColumns := make([]jsonColumn, len(table.Columns)) - for c, col := range table.Columns { - jsonColumns[c] = jsonColumn{ - Name: col.Name, - Type: col.Type.String(), - IsPrimaryKey: col.PrimaryKey, - IsIncrementalKey: col.IncrementalKey, - } - } - jsonTables[i] = jsonTable{ - Name: table.Name, - Title: p.titleTransformer(table), - Description: table.Description, - Columns: jsonColumns, - Relations: p.jsonifyTables(table.Relations), - } - } - return jsonTables -} - -func (p *Plugin) renderTablesAsMarkdown(dir string, pluginName string, tables schema.Tables) error { - for _, table := range tables { - if err := p.renderAllTables(table, dir); err != nil { - return err - } - } - t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ - "indentToDepth": indentToDepth, - }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template for README.md: %v", err) - } - - var b bytes.Buffer - if err := t.Execute(&b, templateData{PluginName: pluginName, Tables: tables}); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - outputPath := filepath.Join(dir, "README.md") - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return nil -} - -func (p *Plugin) renderAllTables(t *schema.Table, dir string) error { - if err := p.renderTable(t, dir); err != nil { - return err - } - for _, r := range t.Relations { - if err := p.renderAllTables(r, dir); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) renderTable(table *schema.Table, dir string) error { - t := template.New("").Funcs(map[string]any{ - "title": p.titleTransformer, - }) - t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template: %v", err) - } - - outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) - - var b bytes.Buffer - if err := t.Execute(&b, table); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return f.Close() -} - -func formatMarkdown(s string) string { - s = reMatchNewlines.ReplaceAllString(s, "\n\n") - return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") -} - -func indentToDepth(table *schema.Table) string { - s := "" - t := table - for t.Parent != nil { - s += " " - t = t.Parent - } - return s -} diff --git a/plugins/source/docs_test.go b/plugins/source/docs_test.go deleted file mode 100644 index 30d34814d3..0000000000 --- a/plugins/source/docs_test.go +++ /dev/null @@ -1,164 +0,0 @@ -//go:build !windows - -package source - -import ( - "os" - "path" - "testing" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/bradleyjkemp/cupaloy/v2" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" - "github.com/stretchr/testify/require" -) - -var testTables = []*schema.Table{ - { - Name: "test_table", - Description: "Description for test table", - Columns: []schema.Column{ - { - Name: "int_col", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "id_col", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - { - Name: "id_col2", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - { - Name: "json_col", - Type: types.ExtensionTypes.JSON, - }, - { - Name: "list_col", - Type: arrow.ListOf(arrow.PrimitiveTypes.Int64), - }, - { - Name: "map_col", - Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), - }, - { - Name: "struct_col", - Type: arrow.StructOf(arrow.Field{Name: "string_field", Type: arrow.BinaryTypes.String}, arrow.Field{Name: "int_field", Type: arrow.PrimitiveTypes.Int64}), - }, - }, - Relations: []*schema.Table{ - { - Name: "relation_table", - Description: "Description for relational table", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - Relations: []*schema.Table{ - { - Name: "relation_relation_table_b", - Description: "Description for relational table's relation", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - }, - { - Name: "relation_relation_table_a", - Description: "Description for relational table's relation", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - }, - }, - }, - { - Name: "relation_table2", - Description: "Description for second relational table", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - }, - }, - }, - { - Name: "incremental_table", - Description: "Description for incremental table", - IsIncremental: true, - Columns: []schema.Column{ - { - Name: "int_col", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "id_col", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - IncrementalKey: true, - }, - { - Name: "id_col2", - Type: arrow.PrimitiveTypes.Int64, - IncrementalKey: true, - }, - }, - }, -} - -func TestGeneratePluginDocs(t *testing.T) { - p := NewPlugin("test", "v1.0.0", testTables, newTestExecutionClient) - - cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) - - t.Run("Markdown", func(t *testing.T) { - tmpdir := t.TempDir() - - err := p.GeneratePluginDocs(tmpdir, "markdown") - if err != nil { - t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) - } - - expectFiles := []string{"test_table.md", "relation_table.md", "relation_relation_table_a.md", "relation_relation_table_b.md", "incremental_table.md", "README.md"} - for _, exp := range expectFiles { - t.Run(exp, func(t *testing.T) { - output := path.Join(tmpdir, exp) - got, err := os.ReadFile(output) - require.NoError(t, err) - cup.SnapshotT(t, got) - }) - } - }) - - t.Run("JSON", func(t *testing.T) { - tmpdir := t.TempDir() - - err := p.GeneratePluginDocs(tmpdir, "json") - if err != nil { - t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) - } - - expectFiles := []string{"__tables.json"} - for _, exp := range expectFiles { - t.Run(exp, func(t *testing.T) { - output := path.Join(tmpdir, exp) - got, err := os.ReadFile(output) - require.NoError(t, err) - cup.SnapshotT(t, got) - }) - } - }) -} diff --git a/plugins/source/metrics.go b/plugins/source/metrics.go deleted file mode 100644 index 9975933779..0000000000 --- a/plugins/source/metrics.go +++ /dev/null @@ -1,207 +0,0 @@ -package source - -import ( - "sync" - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v3/schema" - "golang.org/x/exp/slices" -) - -type Metrics struct { - TableClient map[string]map[string]*TableClientMetrics -} - -type TableClientMetrics struct { - // These should only be accessed with 'Atomic*' methods. - Resources uint64 - Errors uint64 - Panics uint64 - - // These accesses must be protected by the mutex. - startTime time.Time - endTime time.Time - mutex sync.Mutex -} - -func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { - return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics -} - -// Equal compares to stats. Mostly useful in testing -func (s *Metrics) Equal(other *Metrics) bool { - for table, clientStats := range s.TableClient { - for client, stats := range clientStats { - if _, ok := other.TableClient[table]; !ok { - return false - } - if _, ok := other.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(other.TableClient[table][client]) { - return false - } - } - } - for table, clientStats := range other.TableClient { - for client, stats := range clientStats { - if _, ok := s.TableClient[table]; !ok { - return false - } - if _, ok := s.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(s.TableClient[table][client]) { - return false - } - } - } - return true -} - -func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { - s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) - for _, client := range clients { - s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} - } - for _, relation := range table.Relations { - s.initWithClients(relation, clients) - } -} - -func (s *Metrics) TotalErrors() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Errors - } - } - return total -} - -func (s *Metrics) TotalErrorsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Errors) - } - } - return total -} - -func (s *Metrics) TotalPanics() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Panics - } - } - return total -} - -func (s *Metrics) TotalPanicsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Panics) - } - } - return total -} - -func (s *Metrics) TotalResources() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Resources - } - } - return total -} - -func (s *Metrics) TotalResourcesAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Resources) - } - } - return total -} - -func (s *Metrics) MarkStart(table *schema.Table, clientID string) { - now := time.Now() - - s.TableClient[table.Name][clientID].mutex.Lock() - defer s.TableClient[table.Name][clientID].mutex.Unlock() - s.TableClient[table.Name][clientID].startTime = now -} - -// if the table is a top-level table, we need to mark all of its descendents as 'done' as well. -// This is because, when a top-level table is empty (no resources), its descendants are never actually -// synced. -func (s *Metrics) MarkEnd(table *schema.Table, clientID string) { - now := time.Now() - - if table.Parent == nil { - s.markEndRecursive(table, clientID, now) - return - } - - s.TableClient[table.Name][clientID].mutex.Lock() - defer s.TableClient[table.Name][clientID].mutex.Unlock() - s.TableClient[table.Name][clientID].endTime = now -} - -func (s *Metrics) markEndRecursive(table *schema.Table, clientID string, now time.Time) { - // We don't use defer with Unlock(), because we want to unlock the mutex as soon as possible. - s.TableClient[table.Name][clientID].mutex.Lock() - s.TableClient[table.Name][clientID].endTime = now - s.TableClient[table.Name][clientID].mutex.Unlock() - - for _, relation := range table.Relations { - s.markEndRecursive(relation, clientID, now) - } -} - -func (s *Metrics) InProgressTables() []string { - var inProgressTables []string - - for table, tableMetrics := range s.TableClient { - for _, clientMetrics := range tableMetrics { - clientMetrics.mutex.Lock() - endTime := clientMetrics.endTime - startTime := clientMetrics.startTime - clientMetrics.mutex.Unlock() - if endTime.IsZero() && !startTime.IsZero() { - inProgressTables = append(inProgressTables, table) - break - } - } - } - - slices.Sort(inProgressTables) - - return inProgressTables -} - -func (s *Metrics) QueuedTables() []string { - var queuedTables []string - - for table, tableMetrics := range s.TableClient { - for _, clientMetrics := range tableMetrics { - clientMetrics.mutex.Lock() - startTime := clientMetrics.startTime - endTime := clientMetrics.endTime - clientMetrics.mutex.Unlock() - if startTime.IsZero() && endTime.IsZero() { - queuedTables = append(queuedTables, table) - break - } - } - } - - slices.Sort(queuedTables) - return queuedTables -} diff --git a/plugins/source/metrics_test.go b/plugins/source/metrics_test.go deleted file mode 100644 index fb7488d47e..0000000000 --- a/plugins/source/metrics_test.go +++ /dev/null @@ -1,186 +0,0 @@ -package source - -import ( - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/stretchr/testify/assert" -) - -func TestMetrics(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if s.TotalResources() != 1 { - t.Fatal("expected 1 resource") - } - if s.TotalErrors() != 2 { - t.Fatal("expected 2 error") - } - if s.TotalPanics() != 3 { - t.Fatal("expected 3 panics") - } - - other := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - other.TableClient["test_table"] = make(map[string]*TableClientMetrics) - other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if !s.Equal(other) { - t.Fatal("expected metrics to be equal") - } -} - -func TestInProgressTables(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table_done"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_done"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - endTime: time.Now().Add(time.Second), - } - - s.TableClient["test_table_running1"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running1"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - - s.TableClient["test_table_running2"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running2"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - s.TableClient["test_table_running3"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running3"]["testExecutionClient"] = &TableClientMetrics{} - assert.ElementsMatch(t, []string{"test_table_running1", "test_table_running2"}, s.InProgressTables()) -} - -func TestQueuedTables(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table_done"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_done"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - endTime: time.Now().Add(time.Second), - } - - s.TableClient["test_table_running1"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running1"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - - s.TableClient["test_table_running2"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running2"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - s.TableClient["test_table_running3"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running3"]["testExecutionClient"] = &TableClientMetrics{} - assert.ElementsMatch(t, []string{"test_table_running3"}, s.QueuedTables()) -} - -type MockClientMeta struct { -} - -func (*MockClientMeta) ID() string { - return "id" -} - -var exampleTableSchema = &schema.Table{ - Name: "toplevel", - Columns: schema.ColumnList{ - { - Name: "col1", - Type: &arrow.Int32Type{}, - }, - }, - Relations: []*schema.Table{ - { - Name: "child", - Columns: schema.ColumnList{ - { - Name: "col1", - Type: &arrow.Int32Type{}, - }, - }, - }, - }, -} - -// When a top-level table is marked as done, all child tables should be marked as done as well. -// For child-tables, only the specified table should be marked as done. -func TestMarkEndChildTableNotRecursive(t *testing.T) { - mockClientMeta := &MockClientMeta{} - - metrics := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - metrics.TableClient["toplevel"] = nil - metrics.TableClient["child"] = nil - - parentTable := exampleTableSchema - childTable := exampleTableSchema.Relations[0] - - metrics.initWithClients(parentTable, []schema.ClientMeta{mockClientMeta}) - metrics.MarkStart(parentTable, mockClientMeta.ID()) - metrics.MarkStart(childTable, mockClientMeta.ID()) - - assert.ElementsMatch(t, []string{"toplevel", "child"}, metrics.InProgressTables()) - - metrics.MarkEnd(childTable, mockClientMeta.ID()) - - assert.ElementsMatch(t, []string{"toplevel"}, metrics.InProgressTables()) -} - -func TestMarkEndTopLevelTableRecursive(t *testing.T) { - mockClientMeta := &MockClientMeta{} - - metrics := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - metrics.TableClient["toplevel"] = nil - metrics.TableClient["child"] = nil - - parentTable := exampleTableSchema - childTable := exampleTableSchema.Relations[0] - - metrics.initWithClients(parentTable, []schema.ClientMeta{mockClientMeta}) - metrics.MarkStart(parentTable, mockClientMeta.ID()) - metrics.MarkStart(childTable, mockClientMeta.ID()) - - assert.ElementsMatch(t, []string{"toplevel", "child"}, metrics.InProgressTables()) - - metrics.MarkEnd(parentTable, mockClientMeta.ID()) - - assert.Empty(t, metrics.InProgressTables()) -} diff --git a/plugins/source/options.go b/plugins/source/options.go deleted file mode 100644 index 72ddc5acc7..0000000000 --- a/plugins/source/options.go +++ /dev/null @@ -1,39 +0,0 @@ -package source - -import ( - "context" - - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -type GetTables func(ctx context.Context, c schema.ClientMeta) (schema.Tables, error) - -type Option func(*Plugin) - -// WithDynamicTableOption allows the plugin to return list of tables after call to New -func WithDynamicTableOption(getDynamicTables GetTables) Option { - return func(p *Plugin) { - p.getDynamicTables = getDynamicTables - } -} - -// WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables -func WithNoInternalColumns() Option { - return func(p *Plugin) { - p.internalColumns = false - } -} - -func WithUnmanaged() Option { - return func(p *Plugin) { - p.unmanaged = true - } -} - -// WithTitleTransformer allows the plugin to control how table names get turned into titles for the -// generated documentation. -func WithTitleTransformer(t func(*schema.Table) string) Option { - return func(p *Plugin) { - p.titleTransformer = t - } -} diff --git a/plugins/source/plugin.go b/plugins/source/plugin.go deleted file mode 100644 index 5a0363af1e..0000000000 --- a/plugins/source/plugin.go +++ /dev/null @@ -1,345 +0,0 @@ -package source - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/backend" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/internal/backends/local" - "github.com/cloudquery/plugin-sdk/v3/internal/backends/nop" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" - "golang.org/x/sync/semaphore" -) - -type Options struct { - Backend backend.Backend -} - -type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) - -type UnmanagedClient interface { - schema.ClientMeta - Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error -} - -// Plugin is the base structure required to pass to sdk.serve -// We take a declarative approach to API here similar to Cobra -type Plugin struct { - // Name of plugin i.e aws,gcp, azure etc' - name string - // Version of the plugin - version string - // Called upon configure call to validate and init configuration - newExecutionClient NewExecutionClientFunc - // dynamic table function if specified - getDynamicTables GetTables - // Tables is all tables supported by this source plugin - tables schema.Tables - // status sync metrics - metrics *Metrics - // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. - logger zerolog.Logger - // resourceSem is a semaphore that limits the number of concurrent resources being fetched - resourceSem *semaphore.Weighted - // tableSem is a semaphore that limits the number of concurrent tables being fetched - tableSems []*semaphore.Weighted - // maxDepth is the max depth of tables - maxDepth uint64 - // caser - caser *caser.Caser - // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) - mu sync.Mutex - - // client is the initialized session client - client schema.ClientMeta - // sessionTables are the - sessionTables schema.Tables - // backend is the backend used to store the cursor state - backend backend.Backend - // spec is the spec the client was initialized with - spec specs.Source - // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id - // useful for sources such as PostgreSQL and other databases - internalColumns bool - // unmanaged if set to true then the plugin will call Sync directly and not use the scheduler - unmanaged bool - // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string - syncTime time.Time -} - -const ( - maxAllowedDepth = 4 -) - -// Add internal columns -func (p *Plugin) addInternalColumns(tables []*schema.Table) error { - for _, table := range tables { - if c := table.Column("_cq_id"); c != nil { - return fmt.Errorf("table %s already has column _cq_id", table.Name) - } - cqID := schema.CqIDColumn - if len(table.PrimaryKeys()) == 0 { - cqID.PrimaryKey = true - } - cqSourceName := schema.CqSourceNameColumn - cqSyncTime := schema.CqSyncTimeColumn - cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.spec.Name) - } - cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.syncTime) - } - - table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) - if err := p.addInternalColumns(table.Relations); err != nil { - return err - } - } - return nil -} - -// Set parent links on relational tables -func setParents(tables schema.Tables, parent *schema.Table) { - for _, table := range tables { - table.Parent = parent - setParents(table.Relations, table) - } -} - -// Apply transformations to tables -func transformTables(tables schema.Tables) error { - for _, table := range tables { - if table.Transform != nil { - if err := table.Transform(table); err != nil { - return fmt.Errorf("failed to transform table %s: %w", table.Name, err) - } - } - if err := transformTables(table.Relations); err != nil { - return err - } - } - return nil -} - -func maxDepth(tables schema.Tables) uint64 { - var depth uint64 - if len(tables) == 0 { - return 0 - } - for _, table := range tables { - newDepth := 1 + maxDepth(table.Relations) - if newDepth > depth { - depth = newDepth - } - } - return depth -} - -// NewPlugin returns a new plugin with a given name, version, tables, newExecutionClient -// and additional options. -func NewPlugin(name string, version string, tables []*schema.Table, newExecutionClient NewExecutionClientFunc, options ...Option) *Plugin { - p := Plugin{ - name: name, - version: version, - tables: tables, - newExecutionClient: newExecutionClient, - metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - internalColumns: true, - } - for _, opt := range options { - opt(&p) - } - setParents(p.tables, nil) - if err := transformTables(p.tables); err != nil { - panic(err) - } - if p.internalColumns { - if err := p.addInternalColumns(p.tables); err != nil { - panic(err) - } - } - if err := p.validate(); err != nil { - panic(err) - } - p.maxDepth = maxDepth(p.tables) - if p.maxDepth > maxAllowedDepth { - panic(fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth)) - } - return &p -} - -func (p *Plugin) SetLogger(logger zerolog.Logger) { - p.logger = logger.With().Str("module", p.name+"-src").Logger() -} - -// Tables returns all tables supported by this source plugin -func (p *Plugin) Tables() schema.Tables { - return p.tables -} - -func (p *Plugin) HasDynamicTables() bool { - return p.getDynamicTables != nil -} - -func (p *Plugin) GetDynamicTables() schema.Tables { - return p.sessionTables -} - -// TablesForSpec returns all tables supported by this source plugin that match the given spec. -// It validates the tables part of the spec and will return an error if it is found to be invalid. -// This is deprecated method -func (p *Plugin) TablesForSpec(spec specs.Source) (schema.Tables, error) { - spec.SetDefaults() - if err := spec.Validate(); err != nil { - return nil, fmt.Errorf("invalid spec: %w", err) - } - tables, err := p.tables.FilterDfs(spec.Tables, spec.SkipTables, spec.SkipDependentTables) - if err != nil { - return nil, fmt.Errorf("failed to filter tables: %w", err) - } - return tables, nil -} - -// Name return the name of this plugin -func (p *Plugin) Name() string { - return p.name -} - -// Version returns the version of this plugin -func (p *Plugin) Version() string { - return p.version -} - -func (p *Plugin) Metrics() *Metrics { - return p.metrics -} - -func (p *Plugin) Init(ctx context.Context, spec specs.Source) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - - var err error - spec.SetDefaults() - if err := spec.Validate(); err != nil { - return fmt.Errorf("invalid spec: %w", err) - } - p.spec = spec - - switch spec.Backend { - case specs.BackendNone: - p.backend = nop.New() - case specs.BackendLocal: - p.backend, err = local.New(spec) - if err != nil { - return fmt.Errorf("failed to initialize local backend: %w", err) - } - default: - return fmt.Errorf("unknown backend: %s", spec.Backend) - } - - tables := p.tables - if p.getDynamicTables != nil { - p.client, err = p.newExecutionClient(ctx, p.logger, spec, Options{Backend: p.backend}) - if err != nil { - return fmt.Errorf("failed to create execution client for source plugin %s: %w", p.name, err) - } - tables, err = p.getDynamicTables(ctx, p.client) - if err != nil { - return fmt.Errorf("failed to get dynamic tables: %w", err) - } - - tables, err = tables.FilterDfs(spec.Tables, spec.SkipTables, spec.SkipDependentTables) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - if len(tables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - - setParents(tables, nil) - if err := transformTables(tables); err != nil { - return err - } - if p.internalColumns { - if err := p.addInternalColumns(tables); err != nil { - return err - } - } - if err := p.validate(); err != nil { - return err - } - p.maxDepth = maxDepth(tables) - if p.maxDepth > maxAllowedDepth { - return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) - } - } else { - tables, err = tables.FilterDfs(spec.Tables, spec.SkipTables, spec.SkipDependentTables) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - } - - p.sessionTables = tables - return nil -} - -// Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, res chan<- *schema.Resource) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - p.syncTime = syncTime - if p.client == nil { - var err error - p.client, err = p.newExecutionClient(ctx, p.logger, p.spec, Options{Backend: p.backend}) - if err != nil { - return fmt.Errorf("failed to create execution client for source plugin %s: %w", p.name, err) - } - } - - startTime := time.Now() - if p.unmanaged { - unmanagedClient := p.client.(UnmanagedClient) - if err := unmanagedClient.Sync(ctx, p.metrics, res); err != nil { - return fmt.Errorf("failed to sync unmanaged client: %w", err) - } - } else { - switch p.spec.Scheduler { - case specs.SchedulerDFS: - p.syncDfs(ctx, p.spec, p.client, p.sessionTables, res) - case specs.SchedulerRoundRobin: - p.syncRoundRobin(ctx, p.spec, p.client, p.sessionTables, res) - default: - return fmt.Errorf("unknown scheduler %s. Options are: %v", p.spec.Scheduler, specs.AllSchedulers.String()) - } - } - - p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") - return nil -} - -func (p *Plugin) Close(ctx context.Context) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - if p.backend != nil { - err := p.backend.Close(ctx) - if err != nil { - return fmt.Errorf("failed to close backend: %w", err) - } - p.backend = nil - } - return nil -} diff --git a/plugins/source/scheduler.go b/plugins/source/scheduler.go deleted file mode 100644 index 1967f3cc1a..0000000000 --- a/plugins/source/scheduler.go +++ /dev/null @@ -1,177 +0,0 @@ -package source - -import ( - "context" - "errors" - "fmt" - "runtime/debug" - "sync" - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/getsentry/sentry-go" - "github.com/rs/zerolog" - "github.com/thoas/go-funk" -) - -const ( - minTableConcurrency = 1 - minResourceConcurrency = 100 -) - -const periodicMetricLoggerInterval = 30 * time.Second -const periodicMetricLoggerLogTablesLimit = 30 // The max number of in_progress_tables to log in the periodic metric logger - -func (p *Plugin) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { - clientName := client.ID() - for _, table := range tables { - metrics := p.metrics.TableClient[table.Name][clientName] - p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) - } -} - -func (p *Plugin) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { - var validationErr *schema.ValidationError - ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) - defer cancel() - resource := schema.NewResourceData(table, parent, item) - objectStartTime := time.Now() - clientID := client.ID() - tableMetrics := p.metrics.TableClient[table.Name][clientID] - logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - if table.PreResourceResolver != nil { - if err := table.PreResourceResolver(ctx, client, resource); err != nil { - logger.Error().Err(err).Msg("pre resource resolver failed") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - return nil - } - } - - for _, c := range table.Columns { - p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) - } - - if table.PostResourceResolver != nil { - if err := table.PostResourceResolver(ctx, client, resource); err != nil { - logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - atomic.AddUint64(&tableMetrics.Resources, 1) - return resource -} - -func (p *Plugin) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { - var validationErr *schema.ValidationError - columnStartTime := time.Now() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - - if c.Resolver != nil { - if err := c.Resolver(ctx, client, resource, c); err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } else { - // base use case: try to get column with CamelCase name - v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) - if v != nil { - err := resource.Set(c.Name, v) - if err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - } -} - -func (p *Plugin) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { - defer wg.Done() - - ticker := time.NewTicker(periodicMetricLoggerInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - inProgressTables := p.metrics.InProgressTables() - queuedTables := p.metrics.QueuedTables() - logLine := p.logger.Info(). - Uint64("total_resources", p.metrics.TotalResourcesAtomic()). - Uint64("total_errors", p.metrics.TotalErrorsAtomic()). - Uint64("total_panics", p.metrics.TotalPanicsAtomic()). - Int("num_in_progress_tables", len(inProgressTables)). - Int("num_queued_tables", len(queuedTables)) - - if len(inProgressTables) <= periodicMetricLoggerLogTablesLimit { - logLine.Strs("in_progress_tables", inProgressTables) - } - - if len(queuedTables) <= periodicMetricLoggerLogTablesLimit { - logLine.Strs("queued_tables", queuedTables) - } - - logLine.Msg("Sync in progress") - } - } -} - -// unparam's suggestion to remove the second parameter is not good advice here. -// nolint:unparam -func max(a, b uint64) uint64 { - if a > b { - return a - } - return b -} diff --git a/plugins/source/scheduler_dfs.go b/plugins/source/scheduler_dfs.go deleted file mode 100644 index 1cd5142624..0000000000 --- a/plugins/source/scheduler_dfs.go +++ /dev/null @@ -1,234 +0,0 @@ -package source - -import ( - "context" - "errors" - "fmt" - "runtime/debug" - "sync" - "sync/atomic" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/helpers" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/getsentry/sentry-go" - "golang.org/x/sync/semaphore" -) - -func (p *Plugin) syncDfs(ctx context.Context, spec specs.Source, client schema.ClientMeta, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - // This is very similar to the concurrent web crawler problem with some minor changes. - // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) - resourceConcurrency := tableConcurrency * minResourceConcurrency - - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) - // reduce table concurrency logarithmically for every depth level - tableConcurrency = max(tableConcurrency/2, minTableConcurrency) - } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) - - // we have this because plugins can return sometimes clients in a random way which will cause - // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client} - if table.Multiplex != nil { - clients = table.Multiplex(client) - } - // Detect duplicate clients while multiplexing - seenClients := make(map[string]bool) - for _, c := range clients { - if _, ok := seenClients[c.ID()]; !ok { - seenClients[c.ID()] = true - } else { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage("duplicate client ID in " + table.Name) - }) - p.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") - } - } - preInitialisedClients[i] = clients - // we do this here to avoid locks so we initial the metrics structure once in the main goroutines - // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) - } - - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) - - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) - - var wg sync.WaitGroup - for i, table := range tables { - table := table - clients := preInitialisedClients[i] - for _, client := range clients { - client := client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { - // This means context was cancelled - wg.Wait() - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() - return - } - wg.Add(1) - go func() { - defer wg.Done() - defer p.tableSems[0].Release(1) - // not checking for error here as nothing much todo. - // the error is logged and this happens when context is cancelled - p.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) - }() - } - } - - // Wait for all the worker goroutines to finish - wg.Wait() - - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() -} - -func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { - clientName := client.ID() - - p.metrics.MarkStart(table, clientName) - defer p.Metrics().MarkEnd(table, clientName) - - var validationErr *schema.ValidationError - logger := p.logger.With().Str("table", table.Name).Str("client", clientName).Logger() - - if parent == nil { // Log only for root tables, otherwise we spam too much. - logger.Info().Msg("top level table resolver started") - } - tableMetrics := p.metrics.TableClient[table.Name][clientName] - - res := make(chan any) - go func() { - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - logger.Error().Interface("error", err).Str("stack", stack).Msg("table resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - } - close(res) - }() - if err := table.Resolver(ctx, client, parent, res); err != nil { - logger.Error().Err(err).Msg("table resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - return - } - }() - - for r := range res { - p.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) - } - - // we don't need any waitgroups here because we are waiting for the channel to close - if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. - logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) - } -} - -func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { - resourcesSlice := helpers.InterfaceSlice(resources) - if len(resourcesSlice) == 0 { - return - } - resourcesChan := make(chan *schema.Resource, len(resourcesSlice)) - go func() { - defer close(resourcesChan) - var wg sync.WaitGroup - sentValidationErrors := sync.Map{} - for i := range resourcesSlice { - i := i - if err := p.resourceSem.Acquire(ctx, 1); err != nil { - p.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") - wg.Wait() - // we have to continue emptying the channel to exit gracefully - return - } - wg.Add(1) - go func() { - defer p.resourceSem.Release(1) - defer wg.Done() - //nolint:all - resolvedResource := p.resolveResource(ctx, table, client, parent, resourcesSlice[i]) - if resolvedResource == nil { - return - } - - if err := resolvedResource.CalculateCQID(p.spec.DeterministicCQID); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") - if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { - // send resource validation errors to Sentry only once per table, - // to avoid sending too many duplicate messages - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(err.Error()) - }) - } - atomic.AddUint64(&tableMetrics.Errors, 1) - return - } - if err := resolvedResource.Validate(); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") - if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { - // send resource validation errors to Sentry only once per table, - // to avoid sending too many duplicate messages - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(err.Error()) - }) - } - atomic.AddUint64(&tableMetrics.Errors, 1) - return - } - resourcesChan <- resolvedResource - }() - } - wg.Wait() - }() - - var wg sync.WaitGroup - for resource := range resourcesChan { - resource := resource - resolvedResources <- resource - for _, relation := range resource.Table.Relations { - relation := relation - if err := p.tableSems[depth].Acquire(ctx, 1); err != nil { - // This means context was cancelled - wg.Wait() - return - } - wg.Add(1) - go func() { - defer wg.Done() - defer p.tableSems[depth].Release(1) - p.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) - }() - } - } - wg.Wait() -} diff --git a/plugins/source/scheduler_round_robin.go b/plugins/source/scheduler_round_robin.go deleted file mode 100644 index 00b1030f68..0000000000 --- a/plugins/source/scheduler_round_robin.go +++ /dev/null @@ -1,104 +0,0 @@ -package source - -import ( - "context" - "sync" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "golang.org/x/sync/semaphore" -) - -type tableClient struct { - table *schema.Table - client schema.ClientMeta -} - -func (p *Plugin) syncRoundRobin(ctx context.Context, spec specs.Source, client schema.ClientMeta, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) - resourceConcurrency := tableConcurrency * minResourceConcurrency - - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) - // reduce table concurrency logarithmically for every depth level - tableConcurrency = max(tableConcurrency/2, minTableConcurrency) - } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) - - // we have this because plugins can return sometimes clients in a random way which will cause - // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client} - if table.Multiplex != nil { - clients = table.Multiplex(client) - } - preInitialisedClients[i] = clients - // we do this here to avoid locks so we initial the metrics structure once in the main goroutines - // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) - } - - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) - - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) - - tableClients := roundRobinInterleave(tables, preInitialisedClients) - - var wg sync.WaitGroup - for _, tc := range tableClients { - table := tc.table - cl := tc.client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { - // This means context was cancelled - wg.Wait() - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() - return - } - wg.Add(1) - go func() { - defer wg.Done() - defer p.tableSems[0].Release(1) - // not checking for error here as nothing much to do. - // the error is logged and this happens when context is cancelled - // Round Robin currently uses the DFS algorithm to resolve the tables, but this - // may change in the future. - p.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) - }() - } - - // Wait for all the worker goroutines to finish - wg.Wait() - - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() -} - -// interleave table-clients so that we get: -// table1-client1, table2-client1, table3-client1, table1-client2, table2-client2, table3-client2, ... -func roundRobinInterleave(tables schema.Tables, preInitialisedClients [][]schema.ClientMeta) []tableClient { - tableClients := make([]tableClient, 0) - c := 0 - for { - addedNew := false - for i, table := range tables { - if c < len(preInitialisedClients[i]) { - tableClients = append(tableClients, tableClient{table: table, client: preInitialisedClients[i][c]}) - addedNew = true - } - } - c++ - if !addedNew { - break - } - } - return tableClients -} diff --git a/plugins/source/scheduler_round_robin_test.go b/plugins/source/scheduler_round_robin_test.go deleted file mode 100644 index 8f7e3425f5..0000000000 --- a/plugins/source/scheduler_round_robin_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package source - -import ( - "testing" - - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -func TestRoundRobinInterleave(t *testing.T) { - table1 := &schema.Table{Name: "test_table"} - table2 := &schema.Table{Name: "test_table2"} - client1 := &testExecutionClient{} - client2 := &testExecutionClient{} - client3 := &testExecutionClient{} - cases := []struct { - name string - tables schema.Tables - preInitialisedClients [][]schema.ClientMeta - want []tableClient - }{ - { - name: "single table", - tables: schema.Tables{table1}, - preInitialisedClients: [][]schema.ClientMeta{{client1}}, - want: []tableClient{{table: table1, client: client1}}, - }, - { - name: "two tables with different clients", - tables: schema.Tables{table1, table2}, - preInitialisedClients: [][]schema.ClientMeta{{client1}, {client1, client2}}, - want: []tableClient{ - {table: table1, client: client1}, - {table: table2, client: client1}, - {table: table2, client: client2}, - }, - }, - { - name: "two tables with different clients", - tables: schema.Tables{table1, table2}, - preInitialisedClients: [][]schema.ClientMeta{{client1, client3}, {client1, client2}}, - want: []tableClient{ - {table: table1, client: client1}, - {table: table2, client: client1}, - {table: table1, client: client3}, - {table: table2, client: client2}, - }, - }, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got := roundRobinInterleave(tc.tables, tc.preInitialisedClients) - if len(got) != len(tc.want) { - t.Fatalf("got %d tableClients, want %d", len(got), len(tc.want)) - } - for i := range got { - if got[i].table != tc.want[i].table { - t.Errorf("got table %v, want %v", got[i].table, tc.want[i].table) - } - if got[i].client != tc.want[i].client { - t.Errorf("got client %v, want %v", got[i].client, tc.want[i].client) - } - } - }) - } -} diff --git a/plugins/source/templates/all_tables.md.go.tpl b/plugins/source/templates/all_tables.md.go.tpl deleted file mode 100644 index 008afb66fd..0000000000 --- a/plugins/source/templates/all_tables.md.go.tpl +++ /dev/null @@ -1,5 +0,0 @@ -# Source Plugin: {{.PluginName}} -## Tables -{{- range $table := $.Tables }} -{{- template "all_tables_entry.md.go.tpl" $table}} -{{- end }} \ No newline at end of file diff --git a/plugins/source/templates/all_tables_entry.md.go.tpl b/plugins/source/templates/all_tables_entry.md.go.tpl deleted file mode 100644 index 6166b1983b..0000000000 --- a/plugins/source/templates/all_tables_entry.md.go.tpl +++ /dev/null @@ -1,5 +0,0 @@ - -{{. | indentToDepth}}- [{{.Name}}]({{.Name}}.md){{ if .IsIncremental}} (Incremental){{ end }} -{{- range $index, $rel := .Relations}} -{{- template "all_tables_entry.md.go.tpl" $rel}} -{{- end}} \ No newline at end of file diff --git a/plugins/source/templates/table.md.go.tpl b/plugins/source/templates/table.md.go.tpl deleted file mode 100644 index 202d343e39..0000000000 --- a/plugins/source/templates/table.md.go.tpl +++ /dev/null @@ -1,44 +0,0 @@ -# Table: {{$.Name}} - -This table shows data for {{.|title}}. - -{{ $.Description }} -{{ $length := len $.PrimaryKeys -}} -{{ if eq $length 1 }} -The primary key for this table is **{{ index $.PrimaryKeys 0 }}**. -{{ else }} -The composite primary key for this table is ({{ range $index, $pk := $.PrimaryKeys -}} - {{if $index }}, {{end -}} - **{{$pk}}** - {{- end -}}). -{{ end }} -{{- if $.IsIncremental -}} -It supports incremental syncs -{{- $ikLength := len $.IncrementalKeys -}} -{{- if eq $ikLength 1 }} based on the **{{ index $.IncrementalKeys 0 }}** column -{{- else if gt $ikLength 1 }} based on the ({{ range $index, $pk := $.IncrementalKeys -}} - {{- if $index -}}, {{end -}} - **{{$pk}}** - {{- end -}}) columns -{{- end -}}. -{{- end -}} - -{{- if or ($.Relations) ($.Parent) }} -## Relations -{{- end }} -{{- if $.Parent }} -This table depends on [{{ $.Parent.Name }}]({{ $.Parent.Name }}.md). -{{- end}} -{{ if $.Relations }} -The following tables depend on {{.Name}}: -{{- range $rel := $.Relations }} - - [{{ $rel.Name }}]({{ $rel.Name }}.md) -{{- end }} -{{- end }} - -## Columns -| Name | Type | -| ------------- | ------------- | -{{- range $column := $.Columns }} -|{{$column.Name}}{{if $column.PrimaryKey}} (PK){{end}}{{if $column.IncrementalKey}} (Incremental Key){{end}}|`{{$column.Type}}`| -{{- end }} \ No newline at end of file diff --git a/plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json b/plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json deleted file mode 100644 index 7a8280833e..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json +++ /dev/null @@ -1,214 +0,0 @@ -[ - { - "name": "incremental_table", - "title": "Incremental Table", - "description": "Description for incremental table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "int_col", - "type": "int64" - }, - { - "name": "id_col", - "type": "int64", - "is_primary_key": true, - "is_incremental_key": true - }, - { - "name": "id_col2", - "type": "int64", - "is_incremental_key": true - } - ], - "relations": [] - }, - { - "name": "test_table", - "title": "Test Table", - "description": "Description for test table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "int_col", - "type": "int64" - }, - { - "name": "id_col", - "type": "int64", - "is_primary_key": true - }, - { - "name": "id_col2", - "type": "int64", - "is_primary_key": true - }, - { - "name": "json_col", - "type": "json" - }, - { - "name": "list_col", - "type": "list" - }, - { - "name": "map_col", - "type": "map" - }, - { - "name": "struct_col", - "type": "struct" - } - ], - "relations": [ - { - "name": "relation_table", - "title": "Relation Table", - "description": "Description for relational table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [ - { - "name": "relation_relation_table_a", - "title": "Relation Relation Table A", - "description": "Description for relational table's relation", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [] - }, - { - "name": "relation_relation_table_b", - "title": "Relation Relation Table B", - "description": "Description for relational table's relation", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [] - } - ] - }, - { - "name": "relation_table2", - "title": "Relation Table2", - "description": "Description for second relational table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [] - } - ] - } -] - diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md deleted file mode 100644 index 9480a0598a..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Source Plugin: test - -## Tables - -- [incremental_table](incremental_table.md) (Incremental) -- [test_table](test_table.md) - - [relation_table](relation_table.md) - - [relation_relation_table_a](relation_relation_table_a.md) - - [relation_relation_table_b](relation_relation_table_b.md) - - [relation_table2](relation_table2.md) diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md deleted file mode 100644 index d0b1530577..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md +++ /dev/null @@ -1,20 +0,0 @@ -# Table: incremental_table - -This table shows data for Incremental Table. - -Description for incremental table - -The primary key for this table is **id_col**. -It supports incremental syncs based on the (**id_col**, **id_col2**) columns. - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id|`uuid`| -|_cq_parent_id|`uuid`| -|int_col|`int64`| -|id_col (PK) (Incremental Key)|`int64`| -|id_col2 (Incremental Key)|`int64`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md deleted file mode 100644 index 9ee22d1ba1..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md +++ /dev/null @@ -1,21 +0,0 @@ -# Table: relation_relation_table_a - -This table shows data for Relation Relation Table A. - -Description for relational table's relation - -The primary key for this table is **_cq_id**. - -## Relations - -This table depends on [relation_table](relation_table.md). - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id (PK)|`uuid`| -|_cq_parent_id|`uuid`| -|string_col|`utf8`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md deleted file mode 100644 index f6d68a71e1..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md +++ /dev/null @@ -1,21 +0,0 @@ -# Table: relation_relation_table_b - -This table shows data for Relation Relation Table B. - -Description for relational table's relation - -The primary key for this table is **_cq_id**. - -## Relations - -This table depends on [relation_table](relation_table.md). - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id (PK)|`uuid`| -|_cq_parent_id|`uuid`| -|string_col|`utf8`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md deleted file mode 100644 index 95c4125aa7..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md +++ /dev/null @@ -1,25 +0,0 @@ -# Table: relation_table - -This table shows data for Relation Table. - -Description for relational table - -The primary key for this table is **_cq_id**. - -## Relations - -This table depends on [test_table](test_table.md). - -The following tables depend on relation_table: - - [relation_relation_table_a](relation_relation_table_a.md) - - [relation_relation_table_b](relation_relation_table_b.md) - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id (PK)|`uuid`| -|_cq_parent_id|`uuid`| -|string_col|`utf8`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md deleted file mode 100644 index cdd1df3317..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md +++ /dev/null @@ -1,29 +0,0 @@ -# Table: test_table - -This table shows data for Test Table. - -Description for test table - -The composite primary key for this table is (**id_col**, **id_col2**). - -## Relations - -The following tables depend on test_table: - - [relation_table](relation_table.md) - - [relation_table2](relation_table2.md) - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id|`uuid`| -|_cq_parent_id|`uuid`| -|int_col|`int64`| -|id_col (PK)|`int64`| -|id_col2 (PK)|`int64`| -|json_col|`json`| -|list_col|`list`| -|map_col|`map`| -|struct_col|`struct`| diff --git a/plugins/source/testing.go b/plugins/source/testing.go deleted file mode 100644 index 161778bca9..0000000000 --- a/plugins/source/testing.go +++ /dev/null @@ -1,141 +0,0 @@ -package source - -import ( - "context" - "testing" - "time" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) - -func TestPluginSync(t *testing.T, plugin *Plugin, spec specs.Source, opts ...TestPluginOption) { - t.Helper() - - o := &testPluginOptions{ - parallel: true, - validators: []Validator{validatePlugin}, - } - for _, opt := range opts { - opt(o) - } - if o.parallel { - t.Parallel() - } - - resourcesChannel := make(chan *schema.Resource) - var syncErr error - - if err := plugin.Init(context.Background(), spec); err != nil { - t.Fatal(err) - } - - go func() { - defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), time.Now(), resourcesChannel) - }() - - syncedResources := make([]*schema.Resource, 0) - for resource := range resourcesChannel { - syncedResources = append(syncedResources, resource) - } - if syncErr != nil { - t.Fatal(syncErr) - } - for _, validator := range o.validators { - validator(t, plugin, syncedResources) - } -} - -type TestPluginOption func(*testPluginOptions) - -func WithTestPluginNoParallel() TestPluginOption { - return func(f *testPluginOptions) { - f.parallel = false - } -} - -func WithTestPluginAdditionalValidators(v Validator) TestPluginOption { - return func(f *testPluginOptions) { - f.validators = append(f.validators, v) - } -} - -type testPluginOptions struct { - parallel bool - validators []Validator -} - -func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Resource) []*schema.Resource { - t.Helper() - - tableResources := make([]*schema.Resource, 0) - - for _, resource := range resources { - if resource.Table.Name == table.Name { - tableResources = append(tableResources, resource) - } - } - - return tableResources -} - -func validateTable(t *testing.T, table *schema.Table, resources []*schema.Resource) { - t.Helper() - tableResources := getTableResources(t, table, resources) - if len(tableResources) == 0 { - t.Errorf("Expected table %s to be synced but it was not found", table.Name) - return - } - validateResources(t, tableResources) -} - -func validatePlugin(t *testing.T, plugin *Plugin, resources []*schema.Resource) { - t.Helper() - tables := extractTables(plugin.tables) - for _, table := range tables { - validateTable(t, table, resources) - } -} - -func extractTables(tables schema.Tables) []*schema.Table { - result := make([]*schema.Table, 0) - for _, table := range tables { - result = append(result, table) - result = append(result, extractTables(table.Relations)...) - } - return result -} - -// Validates that every column has at least one non-nil value. -// Also does some additional validations. -func validateResources(t *testing.T, resources []*schema.Resource) { - t.Helper() - - table := resources[0].Table - - // A set of column-names that have values in at least one of the resources. - columnsWithValues := make([]bool, len(table.Columns)) - - for _, resource := range resources { - for i, value := range resource.GetValues() { - if value == nil { - continue - } - if value.IsValid() { - columnsWithValues[i] = true - } - } - } - - // Make sure every column has at least one value. - for i, hasValue := range columnsWithValues { - col := table.Columns[i] - emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil - if !hasValue && !emptyExpected && !col.IgnoreInTests { - t.Errorf("table: %s column %s has no values", table.Name, table.Columns[i].Name) - } - } -} diff --git a/plugins/source/validate.go b/plugins/source/validate.go deleted file mode 100644 index 835b798c7e..0000000000 --- a/plugins/source/validate.go +++ /dev/null @@ -1,25 +0,0 @@ -package source - -import ( - "fmt" -) - -func (p *Plugin) validate() error { - if err := p.tables.ValidateDuplicateColumns(); err != nil { - return fmt.Errorf("found duplicate columns in source plugin: %s: %w", p.name, err) - } - - if err := p.tables.ValidateDuplicateTables(); err != nil { - return fmt.Errorf("found duplicate tables in source plugin: %s: %w", p.name, err) - } - - if err := p.tables.ValidateTableNames(); err != nil { - return fmt.Errorf("found table with invalid name in source plugin: %s: %w", p.name, err) - } - - if err := p.tables.ValidateColumnNames(); err != nil { - return fmt.Errorf("found column with invalid name in source plugin: %s: %w", p.name, err) - } - - return nil -} diff --git a/scalar/inet.go b/scalar/inet.go index f693a479e0..3d6163cfc7 100644 --- a/scalar/inet.go +++ b/scalar/inet.go @@ -7,7 +7,7 @@ import ( "strings" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" ) type Inet struct { diff --git a/scalar/json.go b/scalar/json.go index ed6761351b..c0c5fceea3 100644 --- a/scalar/json.go +++ b/scalar/json.go @@ -6,7 +6,7 @@ import ( "reflect" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" ) type JSON struct { diff --git a/scalar/mac.go b/scalar/mac.go index cef4ac27f6..5350a64bee 100644 --- a/scalar/mac.go +++ b/scalar/mac.go @@ -4,7 +4,7 @@ import ( "net" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" ) type Mac struct { diff --git a/scalar/scalar.go b/scalar/scalar.go index 5f471e0258..d80c1a2e5e 100644 --- a/scalar/scalar.go +++ b/scalar/scalar.go @@ -5,9 +5,8 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/float16" - "github.com/cloudquery/plugin-sdk/v3/types" - "golang.org/x/exp/maps" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/types" ) // Scalar represents a single value of a specific DataType as opposed to @@ -33,7 +32,12 @@ type Scalar interface { type Vector []Scalar -const nullValueStr = array.NullValueStr +func (v Vector) ToArrowRecord(sc *arrow.Schema) arrow.Record { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc) + AppendToRecordBuilder(bldr, v) + rec := bldr.NewRecord() + return rec +} func (v Vector) Equal(r Vector) bool { if len(v) != len(r) { diff --git a/scalar/uuid.go b/scalar/uuid.go index f8a79c94b0..dfae523cbd 100644 --- a/scalar/uuid.go +++ b/scalar/uuid.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" ) diff --git a/schema/meta.go b/schema/meta.go index bd739bf80f..bd5ca2de7e 100644 --- a/schema/meta.go +++ b/schema/meta.go @@ -4,8 +4,8 @@ import ( "context" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/types" ) type ClientMeta interface { diff --git a/schema/resource.go b/schema/resource.go index fbbaf6667b..e9d1f07da3 100644 --- a/schema/resource.go +++ b/schema/resource.go @@ -4,7 +4,7 @@ import ( "crypto/sha256" "fmt" - "github.com/cloudquery/plugin-sdk/v3/scalar" + "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/google/uuid" "golang.org/x/exp/slices" ) diff --git a/schema/table.go b/schema/table.go index ed774f3b39..4475170104 100644 --- a/schema/table.go +++ b/schema/table.go @@ -6,7 +6,7 @@ import ( "regexp" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/internal/glob" + "github.com/cloudquery/plugin-sdk/v4/internal/glob" "golang.org/x/exp/slices" ) diff --git a/schema/testdata.go b/schema/testdata.go index 5570c6a090..c592ddc40a 100644 --- a/schema/testdata.go +++ b/schema/testdata.go @@ -12,7 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "golang.org/x/exp/rand" "golang.org/x/exp/slices" @@ -31,6 +31,7 @@ type TestSourceOptions struct { SkipTimes bool // time of day types SkipTimestamps bool // timestamp types. Microsecond timestamp is always be included, regardless of this setting. TimePrecision time.Duration + SkipDecimals bool } // TestSourceColumns returns columns for all Arrow types and composites thereof. TestSourceOptions controls diff --git a/serve/destination.go b/serve/destination.go deleted file mode 100644 index cba93b90a5..0000000000 --- a/serve/destination.go +++ /dev/null @@ -1,209 +0,0 @@ -package serve - -import ( - "fmt" - "net" - "os" - "os/signal" - "strings" - "sync" - "syscall" - - pbv0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - pbv1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" - servers "github.com/cloudquery/plugin-sdk/v3/internal/servers/destination/v0" - serversv1 "github.com/cloudquery/plugin-sdk/v3/internal/servers/destination/v1" - discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/types" - "github.com/getsentry/sentry-go" - grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" - "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - "github.com/thoas/go-funk" - "google.golang.org/grpc" - "google.golang.org/grpc/test/bufconn" -) - -type destinationServe struct { - plugin *destination.Plugin - sentryDSN string -} - -type DestinationOption func(*destinationServe) - -func WithDestinationSentryDSN(dsn string) DestinationOption { - return func(s *destinationServe) { - s.sentryDSN = dsn - } -} - -var testDestinationListener *bufconn.Listener -var testDestinationListenerLock sync.Mutex - -const serveDestinationShort = `Start destination plugin server` - -func Destination(plugin *destination.Plugin, opts ...DestinationOption) { - s := &destinationServe{ - plugin: plugin, - } - for _, opt := range opts { - opt(s) - } - if err := newCmdDestinationRoot(s).Execute(); err != nil { - sentry.CaptureMessage(err.Error()) - fmt.Println(err) - os.Exit(1) - } -} - -// nolint:dupl -func newCmdDestinationServe(serve *destinationServe) *cobra.Command { - var address string - var network string - var noSentry bool - logLevel := newEnum([]string{"trace", "debug", "info", "warn", "error"}, "info") - logFormat := newEnum([]string{"text", "json"}, "text") - telemetryLevel := newEnum([]string{"none", "errors", "stats", "all"}, "all") - err := telemetryLevel.Set(getEnvOrDefault("CQ_TELEMETRY_LEVEL", telemetryLevel.Value)) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to set telemetry level: "+err.Error()) - os.Exit(1) - } - - cmd := &cobra.Command{ - Use: "serve", - Short: serveDestinationShort, - Long: serveDestinationShort, - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - zerologLevel, err := zerolog.ParseLevel(logLevel.String()) - if err != nil { - return err - } - var logger zerolog.Logger - if logFormat.String() == "json" { - logger = zerolog.New(os.Stdout).Level(zerologLevel) - } else { - logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) - } - - var listener net.Listener - if network == "test" { - testDestinationListenerLock.Lock() - listener = bufconn.Listen(testBufSize) - testDestinationListener = listener.(*bufconn.Listener) - testDestinationListenerLock.Unlock() - } else { - listener, err = net.Listen(network, address) - if err != nil { - return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) - } - } - // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go - s := grpc.NewServer( - grpc.ChainUnaryInterceptor( - logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.ChainStreamInterceptor( - logging.StreamServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.MaxRecvMsgSize(MaxMsgSize), - grpc.MaxSendMsgSize(MaxMsgSize), - ) - pbv0.RegisterDestinationServer(s, &servers.Server{ - Plugin: serve.plugin, - Logger: logger, - }) - pbv1.RegisterDestinationServer(s, &serversv1.Server{ - Plugin: serve.plugin, - Logger: logger, - }) - pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ - Versions: []string{"v0", "v1"}, - }) - version := serve.plugin.Version() - - if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { - err = sentry.Init(sentry.ClientOptions{ - Dsn: serve.sentryDSN, - Debug: false, - AttachStacktrace: false, - Release: version, - Transport: sentry.NewHTTPSyncTransport(), - ServerName: "oss", // set to "oss" on purpose to avoid sending any identifying information - // https://docs.sentry.io/platforms/go/configuration/options/#removing-default-integrations - Integrations: func(integrations []sentry.Integration) []sentry.Integration { - var filteredIntegrations []sentry.Integration - for _, integration := range integrations { - if integration.Name() == "Modules" { - continue - } - filteredIntegrations = append(filteredIntegrations, integration) - } - return filteredIntegrations - }, - }) - if err != nil { - log.Error().Err(err).Msg("Error initializing sentry") - } - } - - if err := types.RegisterAllExtensions(); err != nil { - return err - } - defer func() { - if err := types.UnregisterAllExtensions(); err != nil { - logger.Error().Err(err).Msg("Failed to unregister extensions") - } - }() - - ctx := cmd.Context() - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt, syscall.SIGTERM) - defer func() { - signal.Stop(c) - }() - - go func() { - select { - case sig := <-c: - logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Destination plugin server shutting down") - s.Stop() - case <-ctx.Done(): - logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Destination plugin server shutting down") - s.Stop() - } - }() - - logger.Info().Str("address", listener.Addr().String()).Msg("Destination plugin server listening") - if err := s.Serve(listener); err != nil { - return fmt.Errorf("failed to serve: %w", err) - } - return nil - }, - } - cmd.Flags().StringVar(&address, "address", "localhost:7777", "address to serve on. can be tcp: `localhost:7777` or unix socket: `/tmp/plugin.rpc.sock`") - cmd.Flags().StringVar(&network, "network", "tcp", `the network must be "tcp", "tcp4", "tcp6", "unix" or "unixpacket"`) - cmd.Flags().Var(logLevel, "log-level", fmt.Sprintf("log level. one of: %s", strings.Join(logLevel.Allowed, ","))) - cmd.Flags().Var(logFormat, "log-format", fmt.Sprintf("log format. one of: %s", strings.Join(logFormat.Allowed, ","))) - cmd.Flags().BoolVar(&noSentry, "no-sentry", false, "disable sentry") - sendErrors := funk.ContainsString([]string{"all", "errors"}, telemetryLevel.String()) - if !sendErrors { - noSentry = true - } - return cmd -} - -func newCmdDestinationRoot(serve *destinationServe) *cobra.Command { - cmd := &cobra.Command{ - Use: fmt.Sprintf("%s ", serve.plugin.Name()), - } - cmd.AddCommand(newCmdDestinationServe(serve)) - cmd.CompletionOptions.DisableDefaultCmd = true - cmd.Version = serve.plugin.Version() - return cmd -} diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 84c4b0e272..e59bcbfd88 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -16,27 +16,27 @@ import ( "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" - "github.com/cloudquery/plugin-sdk/v3/internal/deprecated" - "github.com/cloudquery/plugin-sdk/v3/internal/memdb" - serversDestination "github.com/cloudquery/plugin-sdk/v3/internal/servers/destination/v0" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" + "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" + "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/protobuf/types/known/timestamppb" ) func bufDestinationDialer(context.Context, string) (net.Conn, error) { - testDestinationListenerLock.Lock() - defer testDestinationListenerLock.Unlock() - return testDestinationListener.Dial() + testPluginListenerLock.Lock() + defer testPluginListenerLock.Unlock() + return testPluginListener.Dial() } func TestDestination(t *testing.T) { - plugin := destination.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &destinationServe{ + plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) + s := &pluginServe{ plugin: plugin, } - cmd := newCmdDestinationRoot(s) + cmd := newCmdPluginRoot(s) cmd.SetArgs([]string{"serve", "--network", "test"}) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -54,12 +54,12 @@ func TestDestination(t *testing.T) { // wait for the server to start for { - testDestinationListenerLock.Lock() - if testDestinationListener != nil { - testDestinationListenerLock.Unlock() + testPluginListenerLock.Lock() + if testPluginListener != nil { + testPluginListenerLock.Unlock() break } - testDestinationListenerLock.Unlock() + testPluginListenerLock.Unlock() t.Log("waiting for grpc server to start") time.Sleep(time.Millisecond * 200) } @@ -163,8 +163,8 @@ func TestDestination(t *testing.T) { for resource := range readCh { totalResources++ if !array.RecordEqual(destRecord, resource) { - diff := destination.RecordDiff(destRecord, resource) - t.Fatalf("expected %v but got %v. Diff: %v", destRecord, resource, diff) + // diff := destination.RecordDiff(destRecord, resource) + t.Fatalf("expected %v but got %v", destRecord, resource) } } if totalResources != 1 { diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index e5172106ad..0f55b90694 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -13,20 +13,21 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/internal/memdb" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/plugins/destination" + "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/protobuf/types/known/timestamppb" ) func TestDestinationV1(t *testing.T) { - plugin := destination.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &destinationServe{ + plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) + s := &pluginServe{ plugin: plugin, } - cmd := newCmdDestinationRoot(s) + cmd := newCmdPluginRoot(s) cmd.SetArgs([]string{"serve", "--network", "test"}) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -44,12 +45,12 @@ func TestDestinationV1(t *testing.T) { // wait for the server to start for { - testDestinationListenerLock.Lock() - if testDestinationListener != nil { - testDestinationListenerLock.Unlock() + testPluginListenerLock.Lock() + if testPluginListener != nil { + testPluginListenerLock.Unlock() break } - testDestinationListenerLock.Unlock() + testPluginListenerLock.Unlock() t.Log("waiting for grpc server to start") time.Sleep(time.Millisecond * 200) } diff --git a/serve/plugin.go b/serve/plugin.go index b37be8513c..2fe9774262 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -9,13 +9,17 @@ import ( "sync" "syscall" - "github.com/cloudquery/plugin-sdk/v3/plugin" + "github.com/cloudquery/plugin-sdk/v4/plugin" + pbDestinationV0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" + pbDestinationV1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" - pbv0 "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" + pbv3 "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + discoveryServerV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/discovery/v0" - serversv0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/plugin/v0" + serverDestinationV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" + serverDestinationV1 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v1" + serversv3 "github.com/cloudquery/plugin-sdk/v4/internal/servers/plugin/v3" "github.com/getsentry/sentry-go" grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" @@ -30,6 +34,7 @@ import ( type pluginServe struct { plugin *plugin.Plugin + destinationV0V1Server bool sentryDSN string } @@ -41,6 +46,14 @@ func WithPluginSentryDSN(dsn string) PluginOption { } } +// WithDestinationV0V1Server is used to include destination v0 and v1 server to work +// with older sources +func WithDestinationV0V1Server() PluginOption { + return func(s *pluginServe) { + s.destinationV0V1Server = true + } +} + // lis used for unit testing grpc server and client var testPluginListener *bufconn.Listener var testPluginListenerLock sync.Mutex @@ -77,8 +90,8 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { cmd := &cobra.Command{ Use: "serve", - Short: serveSourceShort, - Long: serveSourceShort, + Short: servePluginShort, + Long: servePluginShort, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { zerologLevel, err := zerolog.ParseLevel(logLevel.String()) @@ -95,10 +108,10 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { // opts.Plugin.Logger = logger var listener net.Listener if network == "test" { - testSourceListenerLock.Lock() + testPluginListenerLock.Lock() listener = bufconn.Listen(testBufSize) - testSourceListener = listener.(*bufconn.Listener) - testSourceListenerLock.Unlock() + testPluginListener = listener.(*bufconn.Listener) + testPluginListenerLock.Unlock() } else { listener, err = net.Listen(network, address) if err != nil { @@ -120,12 +133,22 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { grpc.MaxSendMsgSize(MaxMsgSize), ) serve.plugin.SetLogger(logger) - pbv0.RegisterPluginServer(s, &serversv0.Server{ + pbv3.RegisterPluginServer(s, &serversv3.Server{ Plugin: serve.plugin, Logger: logger, }) + if serve.destinationV0V1Server { + pbDestinationV1.RegisterDestinationServer(s, &serverDestinationV1.Server{ + Plugin: serve.plugin, + Logger: logger, + }) + pbDestinationV0.RegisterDestinationServer(s, &serverDestinationV0.Server{ + Plugin: serve.plugin, + Logger: logger, + }) + } pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ - Versions: []string{"v2"}, + Versions: []string{"v0", "v1", "v2", "v3"}, }) version := serve.plugin.Version() @@ -211,11 +234,11 @@ func newCmdPluginDoc(serve *pluginServe) *cobra.Command { format := newEnum([]string{"json", "markdown"}, "markdown") cmd := &cobra.Command{ Use: "doc ", - Short: sourceDocShort, - Long: sourceDocLong, + Short: pluginDocShort, + Long: pluginDocLong, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - pbFormat := pbv0.GenDocs_FORMAT(pbv0.GenDocs_FORMAT_value[format.Value]) + pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) return serve.plugin.GeneratePluginDocs(serve.plugin.StaticTables(), args[0], pbFormat) }, } diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 8a541611e9..9aeb864a4c 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -12,10 +12,10 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" - pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/schema" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/plugins/source" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -25,7 +25,11 @@ type TestSourcePluginSpec struct { Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` } -type testExecutionClient struct{} +type testExecutionClient struct { + plugin.UnimplementedSync + plugin.UnimplementedWriter + plugin.UnimplementedRead +} var _ schema.ClientMeta = &testExecutionClient{} @@ -53,24 +57,28 @@ func (*testExecutionClient) ID() string { return "testExecutionClient" } -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, source.Options) (schema.ClientMeta, error) { +func (*testExecutionClient) Close(ctx context.Context) error { + return nil +} + +func newTestExecutionClient(context.Context, zerolog.Logger, pb.Spec) (plugin.Client, error) { return &testExecutionClient{}, nil } func bufSourceDialer(context.Context, string) (net.Conn, error) { - testSourceListenerLock.Lock() - defer testSourceListenerLock.Unlock() - return testSourceListener.Dial() + testPluginListenerLock.Lock() + defer testPluginListenerLock.Unlock() + return testPluginListener.Dial() } func TestSourceSuccess(t *testing.T) { - plugin := source.NewPlugin( + plugin := plugin.NewPlugin( "testPlugin", "v1.0.0", - []*schema.Table{testTable("test_table"), testTable("test_table2")}, - newTestExecutionClient) + newTestExecutionClient, + plugin.WithStaticTables([]*schema.Table{testTable("test_table"), testTable("test_table2")})) - cmd := newCmdSourceRoot(&sourceServe{ + cmd := newCmdPluginRoot(&pluginServe{ plugin: plugin, }) cmd.SetArgs([]string{"serve", "--network", "test"}) @@ -88,12 +96,12 @@ func TestSourceSuccess(t *testing.T) { wg.Wait() }() for { - testSourceListenerLock.Lock() - if testSourceListener != nil { - testSourceListenerLock.Unlock() + testPluginListenerLock.Lock() + if testPluginListener != nil { + testPluginListenerLock.Unlock() break } - testSourceListenerLock.Unlock() + testPluginListenerLock.Unlock() t.Log("waiting for grpc server to start") time.Sleep(time.Millisecond * 200) } @@ -103,7 +111,7 @@ func TestSourceSuccess(t *testing.T) { if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } - c := pb.NewSourceClient(conn) + c := pb.NewPluginClient(conn) getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) if err != nil { @@ -121,21 +129,17 @@ func TestSourceSuccess(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } - spec := specs.Source{ - Name: "testSourcePlugin", - Version: "v1.0.0", - Path: "cloudquery/testSourcePlugin", - Registry: specs.RegistryGithub, - Tables: []string{"test_table"}, - Spec: TestSourcePluginSpec{Accounts: []string{"cloudquery/plugin-sdk"}}, - Destinations: []string{"test"}, - } - specMarshaled, err := json.Marshal(spec) - if err != nil { - t.Fatalf("Failed to marshal spec: %v", err) + spec := pb.Spec{ + Name: "testSourcePlugin", + Version: "v1.0.0", + Path: "cloudquery/testSourcePlugin", + SyncSpec: &pb.SyncSpec{ + Tables: []string{"test_table"}, + Destinations: []string{"test"}, + }, } - getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) + getTablesRes, err := c.GetStaticTables(ctx, &pb.GetStaticTables_Request{}) if err != nil { t.Fatal(err) } @@ -148,7 +152,7 @@ func TestSourceSuccess(t *testing.T) { if len(tables) != 2 { t.Fatalf("Expected 2 tables but got %d", len(tables)) } - if _, err := c.Init(ctx, &pb.Init_Request{Spec: specMarshaled}); err != nil { + if _, err := c.Init(ctx, &pb.Init_Request{Spec: &spec}); err != nil { t.Fatal(err) } diff --git a/serve/source.go b/serve/source.go deleted file mode 100644 index ae57c83d07..0000000000 --- a/serve/source.go +++ /dev/null @@ -1,233 +0,0 @@ -package serve - -import ( - "fmt" - "net" - "os" - "os/signal" - "strings" - "sync" - "syscall" - - pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" - pbv2 "github.com/cloudquery/plugin-pb-go/pb/source/v2" - discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" - - serversv2 "github.com/cloudquery/plugin-sdk/v3/internal/servers/source/v2" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/getsentry/sentry-go" - grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" - "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - "github.com/thoas/go-funk" - "golang.org/x/net/netutil" - "google.golang.org/grpc" - "google.golang.org/grpc/test/bufconn" -) - -type sourceServe struct { - plugin *source.Plugin - sentryDSN string -} - -type SourceOption func(*sourceServe) - -func WithSourceSentryDSN(dsn string) SourceOption { - return func(s *sourceServe) { - s.sentryDSN = dsn - } -} - -// lis used for unit testing grpc server and client -var testSourceListener *bufconn.Listener -var testSourceListenerLock sync.Mutex - -const serveSourceShort = `Start source plugin server` - -func Source(plugin *source.Plugin, opts ...SourceOption) { - s := &sourceServe{ - plugin: plugin, - } - for _, opt := range opts { - opt(s) - } - if err := newCmdSourceRoot(s).Execute(); err != nil { - sentry.CaptureMessage(err.Error()) - fmt.Println(err) - os.Exit(1) - } -} - -// nolint:dupl -func newCmdSourceServe(serve *sourceServe) *cobra.Command { - var address string - var network string - var noSentry bool - logLevel := newEnum([]string{"trace", "debug", "info", "warn", "error"}, "info") - logFormat := newEnum([]string{"text", "json"}, "text") - telemetryLevel := newEnum([]string{"none", "errors", "stats", "all"}, "all") - err := telemetryLevel.Set(getEnvOrDefault("CQ_TELEMETRY_LEVEL", telemetryLevel.Value)) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to set telemetry level: "+err.Error()) - os.Exit(1) - } - - cmd := &cobra.Command{ - Use: "serve", - Short: serveSourceShort, - Long: serveSourceShort, - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - zerologLevel, err := zerolog.ParseLevel(logLevel.String()) - if err != nil { - return err - } - var logger zerolog.Logger - if logFormat.String() == "json" { - logger = zerolog.New(os.Stdout).Level(zerologLevel) - } else { - logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) - } - - // opts.Plugin.Logger = logger - var listener net.Listener - if network == "test" { - testSourceListenerLock.Lock() - listener = bufconn.Listen(testBufSize) - testSourceListener = listener.(*bufconn.Listener) - testSourceListenerLock.Unlock() - } else { - listener, err = net.Listen(network, address) - if err != nil { - return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) - } - } - // source plugins can only accept one connection at a time - // unlike destination plugins that can accept multiple connections - limitListener := netutil.LimitListener(listener, 1) - // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go - s := grpc.NewServer( - grpc.ChainUnaryInterceptor( - logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.ChainStreamInterceptor( - logging.StreamServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.MaxRecvMsgSize(MaxMsgSize), - grpc.MaxSendMsgSize(MaxMsgSize), - ) - serve.plugin.SetLogger(logger) - pbv2.RegisterSourceServer(s, &serversv2.Server{ - Plugin: serve.plugin, - Logger: logger, - }) - pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ - Versions: []string{"v2"}, - }) - - version := serve.plugin.Version() - - if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { - err = sentry.Init(sentry.ClientOptions{ - Dsn: serve.sentryDSN, - Debug: false, - AttachStacktrace: false, - Release: version, - Transport: sentry.NewHTTPSyncTransport(), - ServerName: "oss", // set to "oss" on purpose to avoid sending any identifying information - // https://docs.sentry.io/platforms/go/configuration/options/#removing-default-integrations - Integrations: func(integrations []sentry.Integration) []sentry.Integration { - var filteredIntegrations []sentry.Integration - for _, integration := range integrations { - if integration.Name() == "Modules" { - continue - } - filteredIntegrations = append(filteredIntegrations, integration) - } - return filteredIntegrations - }, - }) - if err != nil { - log.Error().Err(err).Msg("Error initializing sentry") - } - } - - ctx := cmd.Context() - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt, syscall.SIGTERM) - defer func() { - signal.Stop(c) - }() - - go func() { - select { - case sig := <-c: - logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Source plugin server shutting down") - s.Stop() - case <-ctx.Done(): - logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Source plugin server shutting down") - s.Stop() - } - }() - - logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") - if err := s.Serve(limitListener); err != nil { - return fmt.Errorf("failed to serve: %w", err) - } - return nil - }, - } - cmd.Flags().StringVar(&address, "address", "localhost:7777", "address to serve on. can be tcp: `localhost:7777` or unix socket: `/tmp/plugin.rpc.sock`") - cmd.Flags().StringVar(&network, "network", "tcp", `the network must be "tcp", "tcp4", "tcp6", "unix" or "unixpacket"`) - cmd.Flags().Var(logLevel, "log-level", fmt.Sprintf("log level. one of: %s", strings.Join(logLevel.Allowed, ","))) - cmd.Flags().Var(logFormat, "log-format", fmt.Sprintf("log format. one of: %s", strings.Join(logFormat.Allowed, ","))) - cmd.Flags().BoolVar(&noSentry, "no-sentry", false, "disable sentry") - sendErrors := funk.ContainsString([]string{"all", "errors"}, telemetryLevel.String()) - if !sendErrors { - noSentry = true - } - - return cmd -} - -const ( - sourceDocShort = "Generate documentation for tables" - sourceDocLong = `Generate documentation for tables - -If format is markdown, a destination directory will be created (if necessary) containing markdown files. -Example: -doc ./output - -If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. -Example: -doc --format json . -` -) - -func newCmdSourceDoc(serve *sourceServe) *cobra.Command { - format := newEnum([]string{"json", "markdown"}, "markdown") - cmd := &cobra.Command{ - Use: "doc ", - Short: sourceDocShort, - Long: sourceDocLong, - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - return serve.plugin.GeneratePluginDocs(args[0], format.Value) - }, - } - cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) - return cmd -} - -func newCmdSourceRoot(serve *sourceServe) *cobra.Command { - cmd := &cobra.Command{ - Use: fmt.Sprintf("%s ", serve.plugin.Name()), - } - cmd.AddCommand(newCmdSourceServe(serve)) - cmd.AddCommand(newCmdSourceDoc(serve)) - cmd.CompletionOptions.DisableDefaultCmd = true - cmd.Version = serve.plugin.Version() - return cmd -} diff --git a/serve/source_v2_test.go b/serve/source_v2_test.go deleted file mode 100644 index 8a541611e9..0000000000 --- a/serve/source_v2_test.go +++ /dev/null @@ -1,238 +0,0 @@ -package serve - -import ( - "bytes" - "context" - "encoding/json" - "io" - "net" - "sync" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/ipc" - pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -type TestSourcePluginSpec struct { - Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` -} - -type testExecutionClient struct{} - -var _ schema.ClientMeta = &testExecutionClient{} - -// var errTestExecutionClientErr = fmt.Errorf("error in newTestExecutionClientErr") - -func testTable(name string) *schema.Table { - return &schema.Table{ - Name: name, - Resolver: func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil - }, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, source.Options) (schema.ClientMeta, error) { - return &testExecutionClient{}, nil -} - -func bufSourceDialer(context.Context, string) (net.Conn, error) { - testSourceListenerLock.Lock() - defer testSourceListenerLock.Unlock() - return testSourceListener.Dial() -} - -func TestSourceSuccess(t *testing.T) { - plugin := source.NewPlugin( - "testPlugin", - "v1.0.0", - []*schema.Table{testTable("test_table"), testTable("test_table2")}, - newTestExecutionClient) - - cmd := newCmdSourceRoot(&sourceServe{ - plugin: plugin, - }) - cmd.SetArgs([]string{"serve", "--network", "test"}) - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) - var wg sync.WaitGroup - wg.Add(1) - var serverErr error - go func() { - defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) - }() - defer func() { - cancel() - wg.Wait() - }() - for { - testSourceListenerLock.Lock() - if testSourceListener != nil { - testSourceListenerLock.Unlock() - break - } - testSourceListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } - - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufSourceDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) - if err != nil { - t.Fatalf("Failed to dial bufnet: %v", err) - } - c := pb.NewSourceClient(conn) - - getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) - if err != nil { - t.Fatal(err) - } - if getNameRes.Name != "testPlugin" { - t.Fatalf("expected name to be testPlugin but got %s", getNameRes.Name) - } - - getVersionResponse, err := c.GetVersion(ctx, &pb.GetVersion_Request{}) - if err != nil { - t.Fatal(err) - } - if getVersionResponse.Version != "v1.0.0" { - t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) - } - - spec := specs.Source{ - Name: "testSourcePlugin", - Version: "v1.0.0", - Path: "cloudquery/testSourcePlugin", - Registry: specs.RegistryGithub, - Tables: []string{"test_table"}, - Spec: TestSourcePluginSpec{Accounts: []string{"cloudquery/plugin-sdk"}}, - Destinations: []string{"test"}, - } - specMarshaled, err := json.Marshal(spec) - if err != nil { - t.Fatalf("Failed to marshal spec: %v", err) - } - - getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) - if err != nil { - t.Fatal(err) - } - - tables, err := schema.NewTablesFromBytes(getTablesRes.Tables) - if err != nil { - t.Fatal(err) - } - - if len(tables) != 2 { - t.Fatalf("Expected 2 tables but got %d", len(tables)) - } - if _, err := c.Init(ctx, &pb.Init_Request{Spec: specMarshaled}); err != nil { - t.Fatal(err) - } - - getTablesForSpecRes, err := c.GetDynamicTables(ctx, &pb.GetDynamicTables_Request{}) - if err != nil { - t.Fatal(err) - } - tables, err = schema.NewTablesFromBytes(getTablesForSpecRes.Tables) - if err != nil { - t.Fatal(err) - } - - if len(tables) != 1 { - t.Fatalf("Expected 1 table but got %d", len(tables)) - } - - syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) - if err != nil { - t.Fatal(err) - } - var resources []arrow.Record - for { - r, err := syncClient.Recv() - if err == io.EOF { - break - } - if err != nil { - t.Fatal(err) - } - rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) - if err != nil { - t.Fatal(err) - } - for rdr.Next() { - rec := rdr.Record() - rec.Retain() - resources = append(resources, rec) - } - } - - totalResources := 0 - for _, resource := range resources { - sc := resource.Schema() - tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) - if !ok { - t.Fatal("Expected table name metadata to be set") - } - if tableName != "test_table" { - t.Fatalf("Expected resource with table name test_table. got: %s", tableName) - } - if len(resource.Columns()) != 5 { - t.Fatalf("Expected resource with data length 3 but got %d", len(resource.Columns())) - } - totalResources++ - } - if totalResources != 1 { - t.Fatalf("Expected 1 resource on channel but got %d", totalResources) - } - - getMetricsRes, err := c.GetMetrics(ctx, &pb.GetMetrics_Request{}) - if err != nil { - t.Fatal(err) - } - var stats source.Metrics - if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { - t.Fatal(err) - } - - clientStats := stats.TableClient[""][""] - if clientStats.Resources != 1 { - t.Fatalf("Expected 1 resource but got %d", clientStats.Resources) - } - - if clientStats.Errors != 0 { - t.Fatalf("Expected 0 errors but got %d", clientStats.Errors) - } - - if clientStats.Panics != 0 { - t.Fatalf("Expected 0 panics but got %d", clientStats.Panics) - } - - cancel() - wg.Wait() - if serverErr != nil { - t.Fatal(serverErr) - } -} diff --git a/transformers/struct.go b/transformers/struct.go index 2296af865e..b6c97842c5 100644 --- a/transformers/struct.go +++ b/transformers/struct.go @@ -8,9 +8,9 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/thoas/go-funk" "golang.org/x/exp/slices" ) diff --git a/transformers/struct_test.go b/transformers/struct_test.go index 55acfbef16..d59cc6588b 100644 --- a/transformers/struct_test.go +++ b/transformers/struct_test.go @@ -7,8 +7,8 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/go-cmp/cmp" "golang.org/x/exp/slices" ) From eea010c59ada83521a324d2e3752f63e216f3155 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Tue, 30 May 2023 22:05:10 +0300 Subject: [PATCH 003/125] more wip --- internal/servers/destination/v0/specv3tov1.go | 44 ++--- internal/servers/destination/v1/specv3tov1.go | 44 ++--- plugin/managed_writer.go | 2 +- {internal/memdb => plugin}/memdb.go | 35 ++-- {internal/memdb => plugin}/memdb_test.go | 77 ++++----- plugin/nulls.go | 6 +- plugin/options.go | 3 +- plugin/plugin.go | 10 +- plugin/plugin_managed_source_test.go | 2 +- plugin/plugin_round_robin_test.go | 158 +----------------- plugin/testing_overwrite_deletestale.go | 9 +- plugin/testing_write.go | 6 +- plugin/testing_write_append.go | 2 +- plugin/testing_write_migrate.go | 2 +- plugin/testing_write_overwrite.go | 2 +- serve/plugin.go | 4 +- 16 files changed, 124 insertions(+), 282 deletions(-) rename {internal/memdb => plugin}/memdb.go (84%) rename {internal/memdb => plugin}/memdb_test.go (71%) diff --git a/internal/servers/destination/v0/specv3tov1.go b/internal/servers/destination/v0/specv3tov1.go index 31ab4fb5de..1e7146e507 100644 --- a/internal/servers/destination/v0/specv3tov1.go +++ b/internal/servers/destination/v0/specv3tov1.go @@ -7,22 +7,22 @@ import ( func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), DetrministicCqId: spec.DeterministicCQID, }, } switch spec.Scheduler { case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN default: panic("invalid scheduler " + spec.Scheduler.String()) } @@ -31,47 +31,47 @@ func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), + BatchSize: uint64(spec.BatchSize), BatchSizeBytes: uint64(spec.BatchSizeBytes), }, } switch spec.Registry { case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL default: panic("invalid registry " + spec.Registry.String()) } switch spec.WriteMode { case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE default: panic("invalid write mode " + spec.WriteMode.String()) } switch spec.PKMode { case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY } switch spec.MigrateMode { case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE default: panic("invalid migrate mode " + spec.MigrateMode.String()) } return newSpec -} \ No newline at end of file +} diff --git a/internal/servers/destination/v1/specv3tov1.go b/internal/servers/destination/v1/specv3tov1.go index 31ab4fb5de..1e7146e507 100644 --- a/internal/servers/destination/v1/specv3tov1.go +++ b/internal/servers/destination/v1/specv3tov1.go @@ -7,22 +7,22 @@ import ( func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), DetrministicCqId: spec.DeterministicCQID, }, } switch spec.Scheduler { case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN default: panic("invalid scheduler " + spec.Scheduler.String()) } @@ -31,47 +31,47 @@ func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), + BatchSize: uint64(spec.BatchSize), BatchSizeBytes: uint64(spec.BatchSizeBytes), }, } switch spec.Registry { case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL default: panic("invalid registry " + spec.Registry.String()) } switch spec.WriteMode { case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE default: panic("invalid write mode " + spec.WriteMode.String()) } switch spec.PKMode { case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY } switch spec.MigrateMode { case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE default: panic("invalid migrate mode " + spec.MigrateMode.String()) } return newSpec -} \ No newline at end of file +} diff --git a/plugin/managed_writer.go b/plugin/managed_writer.go index 74092f785e..3ed8d26903 100644 --- a/plugin/managed_writer.go +++ b/plugin/managed_writer.go @@ -165,4 +165,4 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tab } p.workersLock.Unlock() return nil -} \ No newline at end of file +} diff --git a/internal/memdb/memdb.go b/plugin/memdb.go similarity index 84% rename from internal/memdb/memdb.go rename to plugin/memdb.go index c84c32255e..e13da4a9c0 100644 --- a/internal/memdb/memdb.go +++ b/plugin/memdb.go @@ -1,17 +1,14 @@ -package memdb +package plugin import ( "context" "fmt" - "os" "sync" - "testing" "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -26,21 +23,21 @@ type client struct { blockingWrite bool } -type Option func(*client) +type MemDBOption func(*client) -func WithErrOnWrite() Option { +func WithErrOnWrite() MemDBOption { return func(c *client) { c.errOnWrite = true } } -func WithBlockingWrite() Option { +func WithBlockingWrite() MemDBOption { return func(c *client) { c.blockingWrite = true } } -func GetNewClient(options ...Option) plugin.NewClientFunc { +func GetNewClient(options ...MemDBOption) NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -48,20 +45,12 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { + return func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { return c, nil } } -func getTestLogger(t *testing.T) zerolog.Logger { - t.Helper() - zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs - return zerolog.New(zerolog.NewTestWriter(t)).Output( - zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.StampMicro}, - ).Level(zerolog.DebugLevel).With().Timestamp().Logger() -} - -func NewClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (plugin.Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), @@ -69,7 +58,7 @@ func NewClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (plugin. }, nil } -func NewClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -98,7 +87,7 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, metrics *plugin.Metrics, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { @@ -209,8 +198,8 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou return nil } -func (*client) Metrics() plugin.Metrics { - return plugin.Metrics{} +func (*client) Metrics() Metrics { + return Metrics{} } func (c *client) Close(context.Context) error { @@ -239,4 +228,4 @@ func (c *client) deleteStaleTable(_ context.Context, table *schema.Table, source } } c.memoryDB[tableName] = filteredTable -} \ No newline at end of file +} diff --git a/internal/memdb/memdb_test.go b/plugin/memdb_test.go similarity index 71% rename from internal/memdb/memdb_test.go rename to plugin/memdb_test.go index b5196d45f3..6b8b78ab9e 100644 --- a/internal/memdb/memdb_test.go +++ b/plugin/memdb_test.go @@ -1,4 +1,4 @@ -package memdb +package plugin import ( "context" @@ -7,14 +7,13 @@ import ( "github.com/apache/arrow/go/v13/arrow" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" "github.com/stretchr/testify/require" ) -var migrateStrategyOverwrite = plugin.MigrateStrategy{ +var migrateStrategyOverwrite = MigrateStrategy{ AddColumn: pbPlugin.WriteSpec_FORCE, AddColumnNotNull: pbPlugin.WriteSpec_FORCE, RemoveColumn: pbPlugin.WriteSpec_FORCE, @@ -22,7 +21,7 @@ var migrateStrategyOverwrite = plugin.MigrateStrategy{ ChangeColumn: pbPlugin.WriteSpec_FORCE, } -var migrateStrategyAppend = plugin.MigrateStrategy{ +var migrateStrategyAppend = MigrateStrategy{ AddColumn: pbPlugin.WriteSpec_FORCE, AddColumnNotNull: pbPlugin.WriteSpec_FORCE, RemoveColumn: pbPlugin.WriteSpec_FORCE, @@ -31,13 +30,13 @@ var migrateStrategyAppend = plugin.MigrateStrategy{ } func TestPluginUnmanagedClient(t *testing.T) { - plugin.PluginTestSuiteRunner( + PluginTestSuiteRunner( t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient) + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }, @@ -45,55 +44,55 @@ func TestPluginUnmanagedClient(t *testing.T) { } func TestPluginManagedClient(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter()) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter()) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), - plugin.WithDefaultBatchSize(1), - plugin.WithDefaultBatchSizeBytes(1)) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), + WithDefaultBatchSize(1), + WithDefaultBatchSizeBytes(1)) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), - plugin.WithDefaultBatchSize(100000000), - plugin.WithDefaultBatchSizeBytes(100000000)) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), + WithDefaultBatchSize(100000000), + WithDefaultBatchSizeBytes(100000000)) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithCQPKs(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient) }, pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{ PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, }, }, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) @@ -101,7 +100,7 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() - p := plugin.NewPlugin("test", "development", NewClientErrOnNew) + p := NewPlugin("test", "development", NewMemDBClientErrOnNew) err := p.Init(ctx, pbPlugin.Spec{}) if err == nil { @@ -112,7 +111,7 @@ func TestPluginOnNewError(t *testing.T) { func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) + p := NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } @@ -147,7 +146,7 @@ func TestOnWriteError(t *testing.T) { func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) + p := NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } @@ -187,19 +186,21 @@ func TestPluginInit(t *testing.T) { batchSizeObserved uint64 batchSizeBytesObserved uint64 ) - p := plugin.NewPlugin( + p := NewPlugin( "test", "development", - func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (plugin.Client, error) { + func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (Client, error) { batchSizeObserved = s.WriteSpec.BatchSize batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes - return NewClient(ctx, logger, s) + return NewMemDBClient(ctx, logger, s) }, - plugin.WithDefaultBatchSize(batchSize), - plugin.WithDefaultBatchSizeBytes(batchSizeBytes), + WithDefaultBatchSize(batchSize), + WithDefaultBatchSizeBytes(batchSizeBytes), ) - require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{})) + require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{}, + })) require.Equal(t, batchSize, batchSizeObserved) require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -} \ No newline at end of file +} diff --git a/plugin/nulls.go b/plugin/nulls.go index 12ad0facf7..02d80a5f1c 100644 --- a/plugin/nulls.go +++ b/plugin/nulls.go @@ -69,8 +69,4 @@ func (f AllowNullFunc) replaceNullsByEmpty(records []arrow.Record) { } records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) } -<<<<<<< HEAD:plugins/destination/nulls.go -} -======= -} ->>>>>>> 5ba1713 (wip):plugin/nulls.go +} \ No newline at end of file diff --git a/plugin/options.go b/plugin/options.go index d3104875e7..2a226724e0 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -45,7 +45,6 @@ func WithStaticTables(tables schema.Tables) Option { } } - func WithManagedWriter() Option { return func(p *Plugin) { p.managedWriter = true @@ -68,4 +67,4 @@ func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { return func(p *Plugin) { p.defaultBatchSizeBytes = defaultBatchSizeBytes } -} \ No newline at end of file +} diff --git a/plugin/plugin.go b/plugin/plugin.go index e812cf941c..a4847ca007 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -49,6 +49,10 @@ func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, res return fmt.Errorf("not implemented") } +func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error { + return fmt.Errorf("not implemented") +} + func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { return fmt.Errorf("not implemented") } @@ -111,8 +115,8 @@ type Plugin struct { syncTime time.Time managedWriter bool - workers map[string]*worker - workersLock *sync.Mutex + workers map[string]*worker + workersLock *sync.Mutex batchTimeout time.Duration defaultBatchSize int @@ -310,7 +314,7 @@ func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { if p.maxDepth > maxAllowedDepth { return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) } - } else { + } else if tables != nil { tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) if err != nil { return fmt.Errorf("failed to filter tables: %w", err) diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index 159c7dd9c8..cdaf02e616 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -8,7 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go index 64b6472387..e24c15d108 100644 --- a/plugin/plugin_round_robin_test.go +++ b/plugin/plugin_round_robin_test.go @@ -2,169 +2,17 @@ package plugin import ( "context" - "fmt" - "sync" "testing" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/rs/zerolog" ) -type testPluginClient struct { - memoryDB map[string][]arrow.Record - tables map[string]*schema.Table - spec pbPlugin.Spec - memoryDBLock sync.RWMutex -} - -type testPluginSpec struct { - ConnectionString string `json:"connection_string"` -} - -func (c *testPluginClient) ID() string { - return "test-plugin" -} - -func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { - c.memoryDBLock.RLock() - for tableName := range c.memoryDB { - for _, row := range c.memoryDB[tableName] { - res <- row - } - } - c.memoryDBLock.RUnlock() - return nil -} - -func (c *testPluginClient) Migrate(ctx context.Context, tables schema.Tables) error { - for _, table := range tables { - tableName := table.Name - memTable := c.memoryDB[tableName] - if memTable == nil { - c.memoryDB[tableName] = make([]arrow.Record, 0) - c.tables[tableName] = table - continue - } - - changes := table.GetChanges(c.tables[tableName]) - // memdb doesn't support any auto-migrate - if changes == nil { - continue - } - c.memoryDB[tableName] = make([]arrow.Record, 0) - c.tables[tableName] = table - } - return nil -} - -func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, resources <-chan arrow.Record) error { - for resource := range resources { - c.memoryDBLock.Lock() - sc := resource.Schema() - tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) - if !ok { - return fmt.Errorf("table name not found in schema metadata") - } - table := c.tables[tableName] - if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { - c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) - } else { - c.overwrite(table, resource) - } - c.memoryDBLock.Unlock() - } - return nil -} - -func (c *testPluginClient) overwrite(table *schema.Table, data arrow.Record) { - pksIndex := table.PrimaryKeysIndexes() - tableName := table.Name - for i, row := range c.memoryDB[tableName] { - found := true - for _, pkIndex := range pksIndex { - s1 := data.Column(pkIndex).String() - s2 := row.Column(pkIndex).String() - if s1 != s2 { - found = false - } - } - if found { - c.memoryDB[tableName] = append(c.memoryDB[tableName][:i], c.memoryDB[tableName][i+1:]...) - c.memoryDB[tableName] = append(c.memoryDB[tableName], data) - return - } - } - c.memoryDB[tableName] = append(c.memoryDB[tableName], data) -} - -func (c *testPluginClient) deleteStaleTable(_ context.Context, table *schema.Table, source string, syncTime time.Time) { - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - syncColIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) - tableName := table.Name - var filteredTable []arrow.Record - for i, row := range c.memoryDB[tableName] { - if row.Column(sourceColIndex).(*array.String).Value(0) == source { - rowSyncTime := row.Column(syncColIndex).(*array.Timestamp).Value(0).ToTime(arrow.Microsecond).UTC() - if !rowSyncTime.Before(syncTime) { - filteredTable = append(filteredTable, c.memoryDB[tableName][i]) - } - } - } - c.memoryDB[tableName] = filteredTable -} - -func (c *testPluginClient) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - for _, table := range tables { - c.deleteStaleTable(ctx, table, sourceName, syncTime) - } - return nil -} - -func (c *testPluginClient) Close(ctx context.Context) error { - c.memoryDB = nil - return nil -} - -func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - tableName := table.Name - if c.memoryDB[tableName] == nil { - return nil - } - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - if sourceColIndex == -1 { - return fmt.Errorf("table %s doesn't have source column", tableName) - } - var sortedRes []arrow.Record - c.memoryDBLock.RLock() - for _, row := range c.memoryDB[tableName] { - arr := row.Column(sourceColIndex) - if arr.(*array.String).Value(0) == sourceName { - sortedRes = append(sortedRes, row) - } - } - c.memoryDBLock.RUnlock() - - for _, row := range sortedRes { - res <- row - } - return nil -} - -func NewTestPluginClient(ctx context.Context, logger zerolog.Logger, spec pbPlugin.Spec) (Client, error) { - return &testPluginClient{ - memoryDB: make(map[string][]arrow.Record), - tables: make(map[string]*schema.Table), - spec: spec, - }, nil -} - func TestPluginRoundRobin(t *testing.T) { ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewTestPluginClient, WithUnmanaged()) + p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanaged()) testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) syncTime := time.Now().UTC() testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ @@ -227,4 +75,4 @@ func TestPluginRoundRobin(t *testing.T) { if err := p.Close(ctx); err != nil { t.Fatal(err) } -} \ No newline at end of file +} diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go index 788decd8a4..3b2266d080 100644 --- a/plugin/testing_overwrite_deletestale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -33,8 +33,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte sourceName := "testOverwriteSource" + uuid.NewString() sourceSpec := pbPlugin.Spec{ - Name: sourceName, - // Backend: specs.BackendLocal, + Name: sourceName, + BackendSpec: &pbPlugin.Spec{ + Name: "local", + Path: "cloudquery/local", + }, } opts := schema.GenTestDataOptions{ @@ -150,4 +153,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } return nil -} \ No newline at end of file +} diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 17fc3f6100..8f2b3da285 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -170,7 +170,9 @@ func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlugin.Spec, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { t.Helper() destSpec.Name = "testsuite" - + if destSpec.WriteSpec == nil { + destSpec.WriteSpec = &pbPlugin.WriteSpec{} + } suite := &PluginTestSuite{ tests: tests, } @@ -291,4 +293,4 @@ func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { } return first.Before(second) }) -} \ No newline at end of file +} diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go index d56d20287e..4720431062 100644 --- a/plugin/testing_write_append.go +++ b/plugin/testing_write_append.go @@ -93,4 +93,4 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } return nil -} \ No newline at end of file +} diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index d0c8b54ea8..9ac2021866 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -281,4 +281,4 @@ func (*PluginTestSuite) destinationPluginTestMigrate( require.NoError(t, p.Init(ctx, nonForced)) require.NoError(t, p.Migrate(ctx, schema.Tables{table})) }) -} \ No newline at end of file +} diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go index a7dba53037..12c8400053 100644 --- a/plugin/testing_write_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -109,4 +109,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } return nil -} \ No newline at end of file +} diff --git a/serve/plugin.go b/serve/plugin.go index 2fe9774262..0b0e1de290 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -33,9 +33,9 @@ import ( ) type pluginServe struct { - plugin *plugin.Plugin + plugin *plugin.Plugin destinationV0V1Server bool - sentryDSN string + sentryDSN string } type PluginOption func(*pluginServe) From 89aa95b43b43c2521470ade968c38645eca08f95 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Wed, 31 May 2023 15:41:33 +0300 Subject: [PATCH 004/125] wip --- internal/servers/plugin/v3/plugin.go | 14 +++-- plugin/docs.go | 4 +- plugin/{docs_test.go.backup => docs_test.go} | 7 ++- plugin/memdb_test.go | 16 +++-- plugin/plugin.go | 44 ++++++++++--- ..._test.go => destination_v0_test.go.backup} | 0 serve/destination_v1_test.go | 32 +++------- serve/plugin.go | 63 ++++++++++++------- serve/plugin_test.go | 35 ++--------- 9 files changed, 117 insertions(+), 98 deletions(-) rename plugin/{docs_test.go.backup => docs_test.go} (92%) rename serve/{destination_v0_test.go => destination_v0_test.go.backup} (100%) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 8a117bee9f..29a58f1fce 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -45,9 +45,11 @@ func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) ( } func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { - // TODO: Fix this - tables := s.Plugin.StaticTables().ToArrowSchemas() - encoded, err := tables.Encode() + tables := s.Plugin.DynamicTables() + if tables == nil { + return &pb.GetDynamicTables_Response{}, nil + } + encoded, err := tables.ToArrowSchemas().Encode() if err != nil { return nil, fmt.Errorf("failed to encode tables: %w", err) } @@ -81,6 +83,10 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { var syncErr error ctx := stream.Context() + if req.SyncSpec == nil { + req.SyncSpec = &pb.SyncSpec{} + } + go func() { defer close(records) err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, records) @@ -229,7 +235,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { tmpDir := os.TempDir() defer os.RemoveAll(tmpDir) - err := s.Plugin.GeneratePluginDocs(s.Plugin.StaticTables(), tmpDir, req.Format) + err := s.Plugin.GeneratePluginDocs(tmpDir, req.Format) if err != nil { return fmt.Errorf("failed to generate docs: %w", err) } diff --git a/plugin/docs.go b/plugin/docs.go index e66bf7ebb2..b100ea649c 100644 --- a/plugin/docs.go +++ b/plugin/docs.go @@ -79,11 +79,11 @@ type templateData struct { } // GeneratePluginDocs creates table documentation for the source plugin based on its list of tables -func (p *Plugin) GeneratePluginDocs(tables schema.Tables, dir string, format pbPlugin.GenDocs_FORMAT) error { +func (p *Plugin) GeneratePluginDocs(dir string, format pbPlugin.GenDocs_FORMAT) error { if err := os.MkdirAll(dir, os.ModePerm); err != nil { return err } - + tables := p.staticTables setDestinationManagedCqColumns(tables) sortedTables := make(schema.Tables, 0, len(tables)) diff --git a/plugin/docs_test.go.backup b/plugin/docs_test.go similarity index 92% rename from plugin/docs_test.go.backup rename to plugin/docs_test.go index 06f271f9fd..878e006e88 100644 --- a/plugin/docs_test.go.backup +++ b/plugin/docs_test.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/bradleyjkemp/cupaloy/v2" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/stretchr/testify/require" @@ -120,14 +121,14 @@ var testTables = []*schema.Table{ } func TestGeneratePluginDocs(t *testing.T) { - p := NewPlugin("test", "v1.0.0", testTables, newTestExecutionClient) + p := NewPlugin("test", "v1.0.0", newTestExecutionClient, WithStaticTables(testTables)) cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) t.Run("Markdown", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, "markdown") + err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_MARKDOWN) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } @@ -146,7 +147,7 @@ func TestGeneratePluginDocs(t *testing.T) { t.Run("JSON", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, "json") + err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_JSON) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } diff --git a/plugin/memdb_test.go b/plugin/memdb_test.go index 6b8b78ab9e..4cfa954f1f 100644 --- a/plugin/memdb_test.go +++ b/plugin/memdb_test.go @@ -112,7 +112,9 @@ func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) p := NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { + if err := p.Init(ctx, pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{}, + }); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -147,7 +149,9 @@ func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) p := NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { + if err := p.Init(ctx, pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{}, + }); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -178,8 +182,8 @@ func TestOnWriteCtxCancelled(t *testing.T) { func TestPluginInit(t *testing.T) { const ( - batchSize = 100 - batchSizeBytes = 1000 + batchSize = uint64(100) + batchSizeBytes = uint64(1000) ) var ( @@ -194,8 +198,8 @@ func TestPluginInit(t *testing.T) { batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes return NewMemDBClient(ctx, logger, s) }, - WithDefaultBatchSize(batchSize), - WithDefaultBatchSizeBytes(batchSizeBytes), + WithDefaultBatchSize(int(batchSize)), + WithDefaultBatchSizeBytes(int(batchSizeBytes)), ) require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{}, diff --git a/plugin/plugin.go b/plugin/plugin.go index a4847ca007..dc254f0827 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -20,6 +20,12 @@ import ( pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" ) +const ( + defaultBatchTimeoutSeconds = 20 + defaultBatchSize = 10000 + defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB +) + type Options struct { Backend backend.Backend } @@ -193,13 +199,17 @@ func maxDepth(tables schema.Tables) uint64 { func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ - name: name, - version: version, - internalColumns: true, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - newClient: newClient, - metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + name: name, + version: version, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, + metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + workersLock: &sync.Mutex{}, + batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, + defaultBatchSize: defaultBatchSize, + defaultBatchSizeBytes: defaultBatchSizeBytes, } for _, opt := range options { opt(&p) @@ -272,12 +282,30 @@ func (p *Plugin) Metrics() *Metrics { return p.metrics } +func (p *Plugin) setSpecDefaults(spec *pbPlugin.Spec) { + if spec.WriteSpec == nil { + spec.WriteSpec = &pbPlugin.WriteSpec{ + BatchSize: uint64(p.defaultBatchSize), + BatchSizeBytes: uint64(p.defaultBatchSizeBytes), + } + } + if spec.WriteSpec.BatchSize == 0 { + spec.WriteSpec.BatchSize = uint64(p.defaultBatchSize) + } + if spec.WriteSpec.BatchSizeBytes == 0 { + spec.WriteSpec.BatchSizeBytes = uint64(p.defaultBatchSizeBytes) + } + if spec.SyncSpec == nil { + spec.SyncSpec = &pbPlugin.SyncSpec{} + } +} + func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - + p.setSpecDefaults(&spec) var err error p.client, err = p.newClient(ctx, p.logger, spec) if err != nil { diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go.backup similarity index 100% rename from serve/destination_v0_test.go rename to serve/destination_v0_test.go.backup diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 0f55b90694..577fd8f9ff 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -13,9 +13,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" - "github.com/cloudquery/plugin-sdk/v4/plugins/destination" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -23,12 +21,8 @@ import ( ) func TestDestinationV1(t *testing.T) { - plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &pluginServe{ - plugin: plugin, - } - cmd := newCmdPluginRoot(s) - cmd.SetArgs([]string{"serve", "--network", "test"}) + p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) var wg sync.WaitGroup @@ -36,27 +30,15 @@ func TestDestinationV1(t *testing.T) { var serverErr error go func() { defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) + serverErr = srv.Serve(ctx) }() defer func() { cancel() wg.Wait() }() - // wait for the server to start - for { - testPluginListenerLock.Lock() - if testPluginListener != nil { - testPluginListenerLock.Unlock() - break - } - testPluginListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufDestinationDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -148,7 +130,7 @@ func TestDestinationV1(t *testing.T) { } // serversDestination readCh := make(chan arrow.Record, 1) - if err := plugin.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Read(ctx, table, sourceName, readCh); err != nil { t.Fatal(err) } close(readCh) @@ -156,8 +138,8 @@ func TestDestinationV1(t *testing.T) { for resource := range readCh { totalResources++ if !array.RecordEqual(rec, resource) { - diff := destination.RecordDiff(rec, resource) - t.Fatalf("expected %v but got %v. Diff: %v", rec, resource, diff) + diff := plugin.RecordDiff(rec, resource) + t.Fatalf("diff at %d: %s", totalResources, diff) } } if totalResources != 1 { diff --git a/serve/plugin.go b/serve/plugin.go index 0b0e1de290..0adddb4b9c 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -1,15 +1,16 @@ package serve import ( + "context" "fmt" "net" "os" "os/signal" "strings" - "sync" "syscall" "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/types" pbDestinationV0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" pbDestinationV1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" @@ -34,8 +35,11 @@ import ( type pluginServe struct { plugin *plugin.Plugin + args []string destinationV0V1Server bool sentryDSN string + testListener bool + testListenerConn *bufconn.Listener } type PluginOption func(*pluginServe) @@ -54,28 +58,48 @@ func WithDestinationV0V1Server() PluginOption { } } -// lis used for unit testing grpc server and client -var testPluginListener *bufconn.Listener -var testPluginListenerLock sync.Mutex +// WithArgs used to serve the plugin with predefined args instead of os.Args +func WithArgs(args ...string) PluginOption { + return func(s *pluginServe) { + s.args = args + } +} + +// WithTestListener means that the plugin will be served with an in-memory listener +// available via testListener() method instead of a network listener. +func WithTestListener() PluginOption { + return func(s *pluginServe) { + s.testListener = true + s.testListenerConn = bufconn.Listen(testBufSize) + } +} const servePluginShort = `Start plugin server` -func Plugin(plugin *plugin.Plugin, opts ...PluginOption) { +func Plugin(plugin *plugin.Plugin, opts ...PluginOption) *pluginServe{ s := &pluginServe{ plugin: plugin, } for _, opt := range opts { opt(s) } - if err := newCmdPluginRoot(s).Execute(); err != nil { - sentry.CaptureMessage(err.Error()) - fmt.Println(err) - os.Exit(1) + return s +} + +func (s *pluginServe) bufPluginDialer(context.Context, string) (net.Conn, error) { + return s.testListenerConn.Dial() +} + +func (s *pluginServe) Serve(ctx context.Context) error { + types.RegisterAllExtensions() + cmd := s.newCmdPluginRoot() + if s.args != nil { + cmd.SetArgs(s.args) } + return cmd.ExecuteContext(ctx) } -// nolint:dupl -func newCmdPluginServe(serve *pluginServe) *cobra.Command { +func (serve *pluginServe) newCmdPluginServe() *cobra.Command { var address string var network string var noSentry bool @@ -107,11 +131,8 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { // opts.Plugin.Logger = logger var listener net.Listener - if network == "test" { - testPluginListenerLock.Lock() - listener = bufconn.Listen(testBufSize) - testPluginListener = listener.(*bufconn.Listener) - testPluginListenerLock.Unlock() + if serve.testListener { + listener = serve.testListenerConn } else { listener, err = net.Listen(network, address) if err != nil { @@ -230,7 +251,7 @@ doc --format json . ` ) -func newCmdPluginDoc(serve *pluginServe) *cobra.Command { +func (serve *pluginServe) newCmdPluginDoc() *cobra.Command { format := newEnum([]string{"json", "markdown"}, "markdown") cmd := &cobra.Command{ Use: "doc ", @@ -239,19 +260,19 @@ func newCmdPluginDoc(serve *pluginServe) *cobra.Command { Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) - return serve.plugin.GeneratePluginDocs(serve.plugin.StaticTables(), args[0], pbFormat) + return serve.plugin.GeneratePluginDocs(args[0], pbFormat) }, } cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) return cmd } -func newCmdPluginRoot(serve *pluginServe) *cobra.Command { +func (serve *pluginServe) newCmdPluginRoot() *cobra.Command { cmd := &cobra.Command{ Use: fmt.Sprintf("%s ", serve.plugin.Name()), } - cmd.AddCommand(newCmdPluginServe(serve)) - cmd.AddCommand(newCmdPluginDoc(serve)) + cmd.AddCommand(serve.newCmdPluginServe()) + cmd.AddCommand(serve.newCmdPluginDoc()) cmd.CompletionOptions.DisableDefaultCmd = true cmd.Version = serve.plugin.Version() return cmd diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 9aeb864a4c..16f719abc8 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -5,16 +5,13 @@ import ( "context" "encoding/json" "io" - "net" "sync" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" - pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" - "github.com/cloudquery/plugin-sdk/v4/plugins/source" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "google.golang.org/grpc" @@ -65,23 +62,13 @@ func newTestExecutionClient(context.Context, zerolog.Logger, pb.Spec) (plugin.Cl return &testExecutionClient{}, nil } -func bufSourceDialer(context.Context, string) (net.Conn, error) { - testPluginListenerLock.Lock() - defer testPluginListenerLock.Unlock() - return testPluginListener.Dial() -} - func TestSourceSuccess(t *testing.T) { - plugin := plugin.NewPlugin( + p := plugin.NewPlugin( "testPlugin", "v1.0.0", newTestExecutionClient, plugin.WithStaticTables([]*schema.Table{testTable("test_table"), testTable("test_table2")})) - - cmd := newCmdPluginRoot(&pluginServe{ - plugin: plugin, - }) - cmd.SetArgs([]string{"serve", "--network", "test"}) + srv := Plugin(p, WithArgs("serve"), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) var wg sync.WaitGroup @@ -89,25 +76,15 @@ func TestSourceSuccess(t *testing.T) { var serverErr error go func() { defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) + serverErr = srv.Serve(ctx) }() defer func() { cancel() wg.Wait() }() - for { - testPluginListenerLock.Lock() - if testPluginListener != nil { - testPluginListenerLock.Unlock() - break - } - testPluginListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufSourceDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -216,7 +193,7 @@ func TestSourceSuccess(t *testing.T) { if err != nil { t.Fatal(err) } - var stats source.Metrics + var stats plugin.Metrics if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { t.Fatal(err) } From b8ab1a1017e00083cae7d3067db49f1e8adf42d6 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Wed, 31 May 2023 15:58:23 +0300 Subject: [PATCH 005/125] more wip --- ..._test.go.backup => destination_v0_test.go} | 34 +++---------------- 1 file changed, 5 insertions(+), 29 deletions(-) rename serve/{destination_v0_test.go.backup => destination_v0_test.go} (82%) diff --git a/serve/destination_v0_test.go.backup b/serve/destination_v0_test.go similarity index 82% rename from serve/destination_v0_test.go.backup rename to serve/destination_v0_test.go index e59bcbfd88..6a206e53ad 100644 --- a/serve/destination_v0_test.go.backup +++ b/serve/destination_v0_test.go @@ -3,7 +3,6 @@ package serve import ( "context" "encoding/json" - "net" "sync" "testing" "time" @@ -17,7 +16,6 @@ import ( schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" - "github.com/cloudquery/plugin-sdk/v4/internal/memdb" serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" @@ -25,19 +23,9 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) -func bufDestinationDialer(context.Context, string) (net.Conn, error) { - testPluginListenerLock.Lock() - defer testPluginListenerLock.Unlock() - return testPluginListener.Dial() -} - func TestDestination(t *testing.T) { - plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &pluginServe{ - plugin: plugin, - } - cmd := newCmdPluginRoot(s) - cmd.SetArgs([]string{"serve", "--network", "test"}) + p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) var wg sync.WaitGroup @@ -45,27 +33,15 @@ func TestDestination(t *testing.T) { var serverErr error go func() { defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) + serverErr = srv.Serve(ctx) }() defer func() { cancel() wg.Wait() }() - // wait for the server to start - for { - testPluginListenerLock.Lock() - if testPluginListener != nil { - testPluginListenerLock.Unlock() - break - } - testPluginListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufDestinationDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -154,7 +130,7 @@ func TestDestination(t *testing.T) { // serversDestination table := serversDestination.TableV2ToV3(tableV2) readCh := make(chan arrow.Record, 1) - if err := plugin.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Read(ctx, table, sourceName, readCh); err != nil { t.Fatal(err) } close(readCh) From f5fa791122ef0cb95f4f833717c0e603736c8e2d Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:27:41 +0300 Subject: [PATCH 006/125] more wip --- helpers/integers.go | 19 -- helpers/pointers.go | 20 -- helpers/pointers_test.go | 50 ---- helpers/strings.go | 39 --- helpers/strings_test.go | 20 -- internal/backends/local/local.go | 2 +- .../servers/destination/v0/destinations.go | 42 ++- internal/servers/destination/v0/specv3tov1.go | 77 ------ .../servers/destination/v1/destinations.go | 34 ++- internal/servers/destination/v1/specv3tov1.go | 77 ------ internal/servers/plugin/v3/plugin.go | 68 +++-- plugin/managed_writer.go | 27 +- plugin/memdb.go | 21 +- plugin/memdb_test.go | 31 --- plugin/options.go | 66 ++++- plugin/plugin.go | 255 ++---------------- plugin/plugin_managed_source_test.go | 41 +-- plugin/plugin_reader.go | 110 ++++++++ plugin/plugin_round_robin_test.go | 78 ------ plugin/plugin_test.go | 97 +++++++ plugin/plugin_writer.go | 69 +++++ plugin/scheduler_dfs.go | 7 +- plugin/scheduler_round_robin.go | 5 +- plugin/testing_overwrite_deletestale.go | 20 +- plugin/testing_sync.go | 45 ++-- plugin/testing_write.go | 56 ++-- plugin/testing_write_append.go | 17 +- plugin/testing_write_migrate.go | 53 ++-- plugin/testing_write_overwrite.go | 15 +- serve/plugin.go | 84 +++--- serve/plugin_test.go | 16 +- 31 files changed, 652 insertions(+), 909 deletions(-) delete mode 100644 helpers/integers.go delete mode 100644 helpers/pointers.go delete mode 100644 helpers/pointers_test.go delete mode 100644 helpers/strings.go delete mode 100644 helpers/strings_test.go delete mode 100644 internal/servers/destination/v0/specv3tov1.go delete mode 100644 internal/servers/destination/v1/specv3tov1.go create mode 100644 plugin/plugin_reader.go delete mode 100644 plugin/plugin_round_robin_test.go create mode 100644 plugin/plugin_test.go create mode 100644 plugin/plugin_writer.go diff --git a/helpers/integers.go b/helpers/integers.go deleted file mode 100644 index a539552377..0000000000 --- a/helpers/integers.go +++ /dev/null @@ -1,19 +0,0 @@ -package helpers - -import "math" - -// Uint64ToInt64 if value is greater than math.MaxInt64 return math.MaxInt64 -// otherwise returns original value cast to int64 -func Uint64ToInt64(i uint64) int64 { - if i > math.MaxInt64 { - return math.MaxInt64 - } - return int64(i) -} - -func Uint64ToInt(i uint64) int { - if i > math.MaxInt { - return math.MaxInt - } - return int(i) -} diff --git a/helpers/pointers.go b/helpers/pointers.go deleted file mode 100644 index 2f5a008535..0000000000 --- a/helpers/pointers.go +++ /dev/null @@ -1,20 +0,0 @@ -package helpers - -import "reflect" - -// ToPointer takes an any object and will return a pointer to this object -// if the object is not already a pointer. Otherwise, it will return the original value. -// It is safe to typecast the return-value of GetPointer into a pointer of the right type, -// except in very special cases (such as passing in nil without an explicit type) -func ToPointer(v any) any { - val := reflect.ValueOf(v) - if val.Kind() == reflect.Ptr { - return v - } - if !val.IsValid() { - return v - } - p := reflect.New(val.Type()) - p.Elem().Set(val) - return p.Interface() -} diff --git a/helpers/pointers_test.go b/helpers/pointers_test.go deleted file mode 100644 index 2ae81ed7a3..0000000000 --- a/helpers/pointers_test.go +++ /dev/null @@ -1,50 +0,0 @@ -package helpers - -import ( - "testing" -) - -type testStruct struct { - test string -} - -func TestToPointer(t *testing.T) { - // passing string should return pointer to string - give := "test" - got := ToPointer(give) - if _, ok := got.(*string); !ok { - t.Errorf("ToPointer(%q) returned %q, expected type *string", give, got) - } - - // passing struct by value should return pointer to (copy of the) struct - giveObj := testStruct{ - test: "value", - } - gotStruct := ToPointer(giveObj) - if _, ok := gotStruct.(*testStruct); !ok { - t.Errorf("ToPointer(%q) returned %q, expected type *testStruct", giveObj, gotStruct) - } - - // passing pointer should return the original pointer - ptr := &giveObj - gotPointer := ToPointer(ptr) - if gotPointer != ptr { - t.Errorf("ToPointer(%q) returned %q, expected %q", ptr, gotPointer, ptr) - } - - // passing nil should return nil back without panicking - gotNil := ToPointer(nil) - if gotNil != nil { - t.Errorf("ToPointer(%v) returned %q, expected nil", nil, gotNil) - } - - // passing number should return pointer to number - giveNumber := int64(0) - gotNumber := ToPointer(giveNumber) - if v, ok := gotNumber.(*int64); !ok { - t.Errorf("ToPointer(%q) returned %q, expected type *int64", giveNumber, gotNumber) - if *v != 0 { - t.Errorf("ToPointer(%q) returned %q, expected 0", giveNumber, gotNumber) - } - } -} diff --git a/helpers/strings.go b/helpers/strings.go deleted file mode 100644 index e522a3c5ea..0000000000 --- a/helpers/strings.go +++ /dev/null @@ -1,39 +0,0 @@ -package helpers - -import ( - "fmt" - "sort" - "strings" - - "github.com/spf13/cast" -) - -func FormatSlice(a []string) string { - // sort slice for consistency - sort.Strings(a) - q := make([]string, len(a)) - for i, s := range a { - q[i] = fmt.Sprintf("%q", s) - } - return fmt.Sprintf("[\n\t%s\n]", strings.Join(q, ",\n\t")) -} - -func HasDuplicates(resources []string) bool { - dups := make(map[string]bool, len(resources)) - for _, r := range resources { - if _, ok := dups[r]; ok { - return true - } - dups[r] = true - } - return false -} - -func ToStringSliceE(i any) ([]string, error) { - switch v := i.(type) { - case *[]string: - return cast.ToStringSliceE(*v) - default: - return cast.ToStringSliceE(i) - } -} diff --git a/helpers/strings_test.go b/helpers/strings_test.go deleted file mode 100644 index 991492df8e..0000000000 --- a/helpers/strings_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package helpers - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestHasDuplicates(t *testing.T) { - assert.False(t, HasDuplicates([]string{"A", "b", "c"})) - assert.False(t, HasDuplicates([]string{"A", "a", "c"})) - assert.True(t, HasDuplicates([]string{"a", "a", "c"})) - assert.True(t, HasDuplicates([]string{"a", "a", "c", "c", "f"})) -} - -func TestToStingSliceE(t *testing.T) { - arr := &[]string{"a", "b", "c"} - newArr, _ := ToStringSliceE(arr) - assert.Equal(t, newArr, []string{"a", "b", "c"}) -} diff --git a/internal/backends/local/local.go b/internal/backends/local/local.go index 0593d8b0b0..f593260dde 100644 --- a/internal/backends/local/local.go +++ b/internal/backends/local/local.go @@ -10,7 +10,7 @@ import ( "strings" "sync" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" ) type Local struct { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index c315bd0652..93fe380574 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -10,7 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -22,9 +22,11 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *plugin.Plugin - Logger zerolog.Logger - spec specs.Destination + Plugin *plugin.Plugin + Logger zerolog.Logger + spec specs.Destination + writeMode plugin.WriteMode + migrateMode plugin.MigrateMode } func (*Server) GetProtocolVersion(context.Context, *pbBase.GetProtocolVersion_Request) (*pbBase.GetProtocolVersion_Response, error) { @@ -39,8 +41,21 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - specV3 := SpecV1ToV3(spec) - return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, specV3) + switch s.spec.WriteMode { + case specs.WriteModeAppend: + s.writeMode = plugin.WriteModeAppend + case specs.WriteModeOverwrite: + s.writeMode = plugin.WriteModeOverwrite + case specs.WriteModeOverwriteDeleteStale: + s.writeMode = plugin.WriteModeOverwriteDeleteStale + } + switch s.spec.MigrateMode { + case specs.MigrateModeSafe: + s.migrateMode = plugin.MigrateModeSafe + case specs.MigrateModeForced: + s.migrateMode = plugin.MigrateModeForced + } + return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) } func (s *Server) GetName(context.Context, *pbBase.GetName_Request) (*pbBase.GetName_Response, error) { @@ -64,7 +79,16 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) + var migrateMode plugin.MigrateMode + switch s.spec.MigrateMode { + case specs.MigrateModeSafe: + migrateMode = plugin.MigrateModeSafe + case specs.MigrateModeForced: + migrateMode = plugin.MigrateModeForced + default: + return nil, status.Errorf(codes.InvalidArgument, "invalid migrate mode: %v", s.spec.MigrateMode) + } + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } func (*Server) Write(pb.Destination_WriteServer) error { @@ -103,9 +127,9 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) + sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) }) sourceColumn := &schemav2.Text{} _ = sourceColumn.Set(sourceSpec.Name) diff --git a/internal/servers/destination/v0/specv3tov1.go b/internal/servers/destination/v0/specv3tov1.go deleted file mode 100644 index 1e7146e507..0000000000 --- a/internal/servers/destination/v0/specv3tov1.go +++ /dev/null @@ -1,77 +0,0 @@ -package destination - -import ( - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-pb-go/specs" -) - -func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), - DetrministicCqId: spec.DeterministicCQID, - }, - } - switch spec.Scheduler { - case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS - case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN - default: - panic("invalid scheduler " + spec.Scheduler.String()) - } - return newSpec -} - -func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), - BatchSizeBytes: uint64(spec.BatchSizeBytes), - }, - } - switch spec.Registry { - case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB - case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC - case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL - default: - panic("invalid registry " + spec.Registry.String()) - } - switch spec.WriteMode { - case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE - default: - panic("invalid write mode " + spec.WriteMode.String()) - } - switch spec.PKMode { - case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT - case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY - } - switch spec.MigrateMode { - case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - default: - panic("invalid migrate mode " + spec.MigrateMode.String()) - } - return newSpec -} diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 4748c3c947..c65c6b4256 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -10,7 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -21,9 +21,11 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *plugin.Plugin - Logger zerolog.Logger - spec specs.Destination + Plugin *plugin.Plugin + Logger zerolog.Logger + spec specs.Destination + writeMode plugin.WriteMode + migrateMode plugin.MigrateMode } func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb.Configure_Response, error) { @@ -32,8 +34,21 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - specV3 := SpecV1ToV3(spec) - return &pb.Configure_Response{}, s.Plugin.Init(ctx, specV3) + switch s.spec.WriteMode { + case specs.WriteModeAppend: + s.writeMode = plugin.WriteModeAppend + case specs.WriteModeOverwrite: + s.writeMode = plugin.WriteModeOverwrite + case specs.WriteModeOverwriteDeleteStale: + s.writeMode = plugin.WriteModeOverwriteDeleteStale + } + switch s.spec.MigrateMode { + case specs.MigrateModeSafe: + s.migrateMode = plugin.MigrateModeSafe + case specs.MigrateModeForced: + s.migrateMode = plugin.MigrateModeForced + } + return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) } func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { @@ -59,7 +74,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr } s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, s.migrateMode) } // Note the order of operations in this method is important! @@ -97,9 +112,10 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { syncTime := r.Timestamp.AsTime() s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) + sourceName := r.Source + eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) }) for { diff --git a/internal/servers/destination/v1/specv3tov1.go b/internal/servers/destination/v1/specv3tov1.go deleted file mode 100644 index 1e7146e507..0000000000 --- a/internal/servers/destination/v1/specv3tov1.go +++ /dev/null @@ -1,77 +0,0 @@ -package destination - -import ( - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-pb-go/specs" -) - -func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), - DetrministicCqId: spec.DeterministicCQID, - }, - } - switch spec.Scheduler { - case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS - case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN - default: - panic("invalid scheduler " + spec.Scheduler.String()) - } - return newSpec -} - -func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), - BatchSizeBytes: uint64(spec.BatchSizeBytes), - }, - } - switch spec.Registry { - case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB - case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC - case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL - default: - panic("invalid registry " + spec.Registry.String()) - } - switch spec.WriteMode { - case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE - default: - panic("invalid write mode " + spec.WriteMode.String()) - } - switch spec.PKMode { - case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT - case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY - } - switch spec.MigrateMode { - case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - default: - panic("invalid migrate mode " + spec.MigrateMode.String()) - } - return newSpec -} diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 29a58f1fce..bbeb968859 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "path/filepath" @@ -30,7 +29,6 @@ type Server struct { pb.UnimplementedPluginServer Plugin *plugin.Plugin Logger zerolog.Logger - spec pb.Spec } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -71,10 +69,9 @@ func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVer } func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - if err := s.Plugin.Init(ctx, *req.Spec); err != nil { + if err := s.Plugin.Init(ctx, req.Spec); err != nil { return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) } - s.spec = *req.Spec return &pb.Init_Response{}, nil } @@ -83,20 +80,27 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { var syncErr error ctx := stream.Context() - if req.SyncSpec == nil { - req.SyncSpec = &pb.SyncSpec{} + syncOptions := plugin.SyncOptions{ + Tables: req.Tables, + SkipTables: req.SkipTables, + Concurrency: req.Concurrency, + Scheduler: plugin.SchedulerDFS, } + if req.Scheduler == pb.SCHEDULER_SCHEDULER_ROUND_ROBIN { + syncOptions.Scheduler = plugin.SchedulerRoundRobin + } + + sourceName := req.SourceName go func() { defer close(records) - err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, records) + err := s.Plugin.Sync(ctx, sourceName, req.SyncTime.AsTime(), syncOptions, records) if err != nil { syncErr = fmt.Errorf("failed to sync records: %w", err) } }() for rec := range records { - var buf bytes.Buffer w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) if err := w.Write(rec); err != nil { @@ -158,8 +162,18 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr if err != nil { return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) + if req.PkMode == pb.PK_MODE_CQ_ID_ONLY { + setCQIDAsPrimaryKeysForTables(tables) + } + migrateMode := plugin.MigrateModeSafe + switch req.MigrateMode { + case pb.MIGRATE_MODE_SAFE: + migrateMode = plugin.MigrateModeSafe + case pb.MIGRATE_MODE_FORCE: + migrateMode = plugin.MigrateModeForced + } + // switch req. + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } func (s *Server) Write(msg pb.Plugin_WriteServer) error { @@ -181,12 +195,23 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { if err != nil { return status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - s.setPKsForTables(tables) - sourceSpec := *r.SourceSpec - syncTime := r.Timestamp.AsTime() + if r.PkMode == pb.PK_MODE_CQ_ID_ONLY { + setCQIDAsPrimaryKeysForTables(tables) + } + sourceName := r.SourceName + syncTime := r.SyncTime.AsTime() + writeMode := plugin.WriteModeOverwrite + switch r.WriteMode { + case pb.WRITE_MODE_WRITE_MODE_APPEND: + writeMode = plugin.WriteModeAppend + case pb.WRITE_MODE_WRITE_MODE_OVERWRITE: + writeMode = plugin.WriteModeOverwrite + case pb.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE: + writeMode = plugin.WriteModeOverwriteDeleteStale + } eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceName, tables, syncTime, writeMode, resources) }) for { @@ -233,15 +258,18 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { - tmpDir := os.TempDir() + tmpDir, err := os.MkdirTemp("", "cloudquery-docs") + if err != nil { + return fmt.Errorf("failed to create tmp dir: %w", err) + } defer os.RemoveAll(tmpDir) - err := s.Plugin.GeneratePluginDocs(tmpDir, req.Format) + err = s.Plugin.GeneratePluginDocs(tmpDir, req.Format) if err != nil { return fmt.Errorf("failed to generate docs: %w", err) } // list files in tmpDir - files, err := ioutil.ReadDir(tmpDir) + files, err := os.ReadDir(tmpDir) if err != nil { return fmt.Errorf("failed to read tmp dir: %w", err) } @@ -281,12 +309,6 @@ func checkMessageSize(msg proto.Message, record arrow.Record) error { return nil } -func (s *Server) setPKsForTables(tables schema.Tables) { - if s.spec.WriteSpec.PkMode == pb.WriteSpec_CQ_ID_ONLY { - setCQIDAsPrimaryKeysForTables(tables) - } -} - func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { for _, table := range tables { for i, col := range table.Columns { diff --git a/plugin/managed_writer.go b/plugin/managed_writer.go index 3ed8d26903..6af7fd0004 100644 --- a/plugin/managed_writer.go +++ b/plugin/managed_writer.go @@ -8,7 +8,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" - "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/internal/pk" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -20,7 +19,7 @@ type worker struct { flush chan chan bool } -func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Table, ch <-chan arrow.Record, flush <-chan chan bool) { +func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, ch <-chan arrow.Record, flush <-chan chan bool) { sizeBytes := int64(0) resources := make([]arrow.Record, 0) for { @@ -28,27 +27,27 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab case r, ok := <-ch: if !ok { if len(resources) > 0 { - p.flush(ctx, metrics, table, resources) + p.flush(ctx, metrics, table, writeMode, resources) } return } - if uint64(len(resources)) == p.spec.WriteSpec.BatchSize || sizeBytes+util.TotalRecordSize(r) > int64(p.spec.WriteSpec.BatchSizeBytes) { - p.flush(ctx, metrics, table, resources) - resources = resources[:0] // allows for mem reuse + if uint64(len(resources)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { + p.flush(ctx, metrics, table, writeMode, resources) + resources = make([]arrow.Record, 0) sizeBytes = 0 } resources = append(resources, r) sizeBytes += util.TotalRecordSize(r) case <-time.After(p.batchTimeout): if len(resources) > 0 { - p.flush(ctx, metrics, table, resources) - resources = resources[:0] // allows for mem reuse + p.flush(ctx, metrics, table, writeMode, resources) + resources = make([]arrow.Record, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - p.flush(ctx, metrics, table, resources) - resources = resources[:0] // allows for mem reuse + p.flush(ctx, metrics, table, writeMode, resources) + resources = make([]arrow.Record, 0) sizeBytes = 0 } done <- true @@ -59,11 +58,11 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab } } -func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Table, resources []arrow.Record) { +func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, resources []arrow.Record) { resources = p.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) - if err := p.client.WriteTableBatch(ctx, table, resources); err != nil { + if err := p.client.WriteTableBatch(ctx, table, writeMode, resources); err != nil { p.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") // we don't return an error as we need to continue until channel is closed otherwise there will be a deadlock // atomic.AddUint64(&metrics.Errors, uint64(batchSize)) @@ -100,7 +99,7 @@ func (*Plugin) removeDuplicatesByPK(table *schema.Table, resources []arrow.Recor return res } -func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { +func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { workers := make(map[string]*worker, len(tables)) metrics := &Metrics{} @@ -120,7 +119,7 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tab wg.Add(1) go func() { defer wg.Done() - p.worker(ctx, metrics, table, ch, flush) + p.worker(ctx, metrics, table, writeMode, ch, flush) }() } else { p.workers[table.Name].count++ diff --git a/plugin/memdb.go b/plugin/memdb.go index e13da4a9c0..4a99073935 100644 --- a/plugin/memdb.go +++ b/plugin/memdb.go @@ -8,14 +8,12 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) // client is mostly used for testing the destination plugin. type client struct { - spec pbPlugin.Spec memoryDB map[string][]arrow.Record tables map[string]*schema.Table memoryDBLock sync.RWMutex @@ -45,20 +43,19 @@ func GetNewClient(options ...MemDBOption) NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { + return func(context.Context, zerolog.Logger, any) (Client, error) { return c, nil } } -func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), - spec: spec, }, nil } -func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -87,7 +84,7 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { @@ -98,7 +95,7 @@ func (c *client) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Re return nil } -func (c *client) Migrate(_ context.Context, tables schema.Tables) error { +func (c *client) Migrate(_ context.Context, tables schema.Tables, migrateMode MigrateMode) error { for _, table := range tables { tableName := table.Name memTable := c.memoryDB[tableName] @@ -144,7 +141,7 @@ func (c *client) Read(_ context.Context, table *schema.Table, source string, res return nil } -func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan arrow.Record) error { +func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode, resources <-chan arrow.Record) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -164,7 +161,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan ar return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { + if writeMode == WriteModeAppend { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -174,7 +171,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan ar return nil } -func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error { +func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, resources []arrow.Record) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -188,7 +185,7 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou tableName := table.Name for _, resource := range resources { c.memoryDBLock.Lock() - if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { + if writeMode == WriteModeAppend { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) diff --git a/plugin/memdb_test.go b/plugin/memdb_test.go index 4cfa954f1f..d89a70ac0d 100644 --- a/plugin/memdb_test.go +++ b/plugin/memdb_test.go @@ -9,8 +9,6 @@ import ( pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" - "github.com/rs/zerolog" - "github.com/stretchr/testify/require" ) var migrateStrategyOverwrite = MigrateStrategy{ @@ -179,32 +177,3 @@ func TestOnWriteCtxCancelled(t *testing.T) { t.Fatal(err) } } - -func TestPluginInit(t *testing.T) { - const ( - batchSize = uint64(100) - batchSizeBytes = uint64(1000) - ) - - var ( - batchSizeObserved uint64 - batchSizeBytesObserved uint64 - ) - p := NewPlugin( - "test", - "development", - func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (Client, error) { - batchSizeObserved = s.WriteSpec.BatchSize - batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes - return NewMemDBClient(ctx, logger, s) - }, - WithDefaultBatchSize(int(batchSize)), - WithDefaultBatchSizeBytes(int(batchSizeBytes)), - ) - require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{}, - })) - - require.Equal(t, batchSize, batchSizeObserved) - require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -} diff --git a/plugin/options.go b/plugin/options.go index 2a226724e0..fa2b3b53f2 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,12 +1,74 @@ package plugin import ( + "bytes" "context" "time" "github.com/cloudquery/plugin-sdk/v4/schema" ) +type MigrateMode int + +const ( + MigrateModeSafe MigrateMode = iota + MigrateModeForced +) + +var ( + migrateModeStrings = []string{"safe", "forced"} +) + +func (m MigrateMode) String() string { + return migrateModeStrings[m] +} + +type WriteMode int + +const ( + WriteModeOverwriteDeleteStale WriteMode = iota + WriteModeOverwrite + WriteModeAppend +) + +var ( + writeModeStrings = []string{"overwrite-delete-stale", "overwrite", "append"} +) + +func (m WriteMode) String() string { + return writeModeStrings[m] +} + +type Scheduler int + +const ( + SchedulerDFS Scheduler = iota + SchedulerRoundRobin +) + +var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} +var AllSchedulerNames = [...]string{ + SchedulerDFS: "dfs", + SchedulerRoundRobin: "round-robin", +} + +type Schedulers []Scheduler + +func (s Schedulers) String() string { + var buffer bytes.Buffer + for i, scheduler := range s { + if i > 0 { + buffer.WriteString(", ") + } + buffer.WriteString(scheduler.String()) + } + return buffer.String() +} + +func (s Scheduler) String() string { + return AllSchedulerNames[s] +} + type GetTables func(ctx context.Context, c Client) (schema.Tables, error) type Option func(*Plugin) @@ -25,9 +87,9 @@ func WithNoInternalColumns() Option { } } -func WithUnmanaged() Option { +func WithUnmanagedSync() Option { return func(p *Plugin) { - p.unmanaged = true + p.unmanagedSync = true } } diff --git a/plugin/plugin.go b/plugin/plugin.go index dc254f0827..754fd424d6 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,17 +7,12 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/backend" "github.com/cloudquery/plugin-sdk/v4/caser" - "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/semaphore" - - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" ) const ( @@ -32,14 +27,14 @@ type Options struct { type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) -type NewClientFunc func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) +type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { ID() string - Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error - Migrate(ctx context.Context, tables schema.Tables) error - WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error - Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error + Sync(ctx context.Context, res chan<- arrow.Record) error + Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error + WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error + Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error @@ -65,7 +60,7 @@ func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { +func (UnimplementedSync) Sync(ctx context.Context, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } @@ -110,15 +105,17 @@ type Plugin struct { // backend is the backend used to store the cursor state backend backend.Backend // spec is the spec the client was initialized with - spec pbPlugin.Spec + spec any // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id // useful for sources such as PostgreSQL and other databases internalColumns bool - // unmanaged if set to true then the plugin will call Sync directly and not use the scheduler - unmanaged bool + // unmanagedSync if set to true then the plugin will call Sync directly and not use the scheduler + unmanagedSync bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string - syncTime time.Time + titleTransformer func(*schema.Table) string + syncTime time.Time + sourceName string + deterministicCQId bool managedWriter bool workers map[string]*worker @@ -146,7 +143,7 @@ func (p *Plugin) addInternalColumns(tables []*schema.Table) error { cqSourceName := schema.CqSourceNameColumn cqSyncTime := schema.CqSyncTimeColumn cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.spec.Name) + return resource.Set(c.Name, p.sourceName) } cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { return resource.Set(c.Name, p.syncTime) @@ -197,6 +194,8 @@ func maxDepth(tables schema.Tables) uint64 { return depth } +// NewPlugin returns a new CloudQuery Plugin with the given name, version and implementation. +// Depending on the options, it can be write only plugin, read only plugin or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ name: name, @@ -206,6 +205,7 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... titleTransformer: DefaultTitleTransformer, newClient: newClient, metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + workers: make(map[string]*worker), workersLock: &sync.Mutex{}, batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, defaultBatchSize: defaultBatchSize, @@ -215,6 +215,10 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... opt(&p) } if p.staticTables != nil { + setParents(p.staticTables, nil) + if err := transformTables(p.staticTables); err != nil { + panic(err) + } if p.internalColumns { if err := p.addInternalColumns(p.staticTables); err != nil { panic(err) @@ -246,66 +250,16 @@ func (p *Plugin) SetLogger(logger zerolog.Logger) { p.logger = logger.With().Str("module", p.name+"-src").Logger() } -// Tables returns all tables supported by this source plugin -func (p *Plugin) StaticTables() schema.Tables { - return p.staticTables -} - -func (p *Plugin) HasDynamicTables() bool { - return p.getDynamicTables != nil -} - -func (p *Plugin) DynamicTables() schema.Tables { - return p.sessionTables -} - -func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { - var readErr error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - readErr = p.Read(ctx, table, sourceName, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, readErr -} - -func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return p.client.Read(ctx, table, sourceName, res) -} - func (p *Plugin) Metrics() *Metrics { return p.metrics } -func (p *Plugin) setSpecDefaults(spec *pbPlugin.Spec) { - if spec.WriteSpec == nil { - spec.WriteSpec = &pbPlugin.WriteSpec{ - BatchSize: uint64(p.defaultBatchSize), - BatchSizeBytes: uint64(p.defaultBatchSizeBytes), - } - } - if spec.WriteSpec.BatchSize == 0 { - spec.WriteSpec.BatchSize = uint64(p.defaultBatchSize) - } - if spec.WriteSpec.BatchSizeBytes == 0 { - spec.WriteSpec.BatchSizeBytes = uint64(p.defaultBatchSizeBytes) - } - if spec.SyncSpec == nil { - spec.SyncSpec = &pbPlugin.SyncSpec{} - } -} - -func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { +// Init initializes the plugin with the given spec. +func (p *Plugin) Init(ctx context.Context, spec any) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - p.setSpecDefaults(&spec) var err error p.client, err = p.newClient(ctx, p.logger, spec) if err != nil { @@ -313,167 +267,6 @@ func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { } p.spec = spec - tables := p.staticTables - if p.getDynamicTables != nil { - tables, err = p.getDynamicTables(ctx, p.client) - if err != nil { - return fmt.Errorf("failed to get dynamic tables: %w", err) - } - - tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - if len(tables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - - setParents(tables, nil) - if err := transformTables(tables); err != nil { - return err - } - if p.internalColumns { - if err := p.addInternalColumns(tables); err != nil { - return err - } - } - - p.maxDepth = maxDepth(tables) - if p.maxDepth > maxAllowedDepth { - return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) - } - } else if tables != nil { - tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - } - p.sessionTables = tables - - return nil -} - -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables) error { - return p.client.Migrate(ctx, tables) -} - -func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { - return p.client.Write(ctx, tables, res) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resource arrow.Record) error { - resources := []arrow.Record{resource} - return p.writeAll(ctx, sourceSpec, syncTime, resources) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resources []arrow.Record) error { - ch := make(chan arrow.Record, len(resources)) - for _, resource := range resources { - ch <- resource - } - close(ch) - tables := make(schema.Tables, 0) - tableNames := make(map[string]struct{}) - for _, resource := range resources { - sc := resource.Schema() - tableMD := sc.Metadata() - name, found := tableMD.GetValue(schema.MetadataTableName) - if !found { - return fmt.Errorf("missing table name") - } - if _, ok := tableNames[name]; ok { - continue - } - table, err := schema.NewTableFromArrowSchema(resource.Schema()) - if err != nil { - return err - } - tables = append(tables, table) - tableNames[table.Name] = struct{}{} - } - return p.Write(ctx, sourceSpec, tables, syncTime, ch) -} - -func (p *Plugin) Write(ctx context.Context, sourceSpec pbPlugin.Spec, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { - syncTime = syncTime.UTC() - if err := p.client.Write(ctx, tables, res); err != nil { - return err - } - if p.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE { - tablesToDelete := tables - if sourceSpec.BackendSpec != nil { - tablesToDelete = make(schema.Tables, 0, len(tables)) - for _, t := range tables { - if !t.IsIncremental { - tablesToDelete = append(tablesToDelete, t) - } - } - } - if err := p.DeleteStale(ctx, tablesToDelete, sourceSpec.Name, syncTime); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - syncTime = syncTime.UTC() - return p.client.DeleteStale(ctx, tables, sourceName, syncTime) -} - -func (p *Plugin) syncAll(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec) ([]arrow.Record, error) { - var err error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - err = p.Sync(ctx, syncTime, syncSpec, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, err -} - -// Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- arrow.Record) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - p.syncTime = syncTime - - startTime := time.Now() - if p.unmanaged { - if err := p.client.Sync(ctx, p.metrics, res); err != nil { - return fmt.Errorf("failed to sync unmanaged client: %w", err) - } - } else { - resources := make(chan *schema.Resource) - go func() { - defer close(resources) - switch syncSpec.Scheduler { - case pbPlugin.SyncSpec_SCHEDULER_DFS: - p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, resources) - case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: - p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, resources) - default: - panic(fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String())) - } - }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - res <- rec - } - } - - p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") return nil } diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index cdaf02e616..3c55c7d4ae 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -146,7 +146,7 @@ func (*testExecutionClient) Read(ctx context.Context, table *schema.Table, sourc return fmt.Errorf("not implemented") } -func (*testExecutionClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { +func (*testExecutionClient) Sync(ctx context.Context, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } @@ -356,7 +356,7 @@ func (testRand) Read(p []byte) (n int, err error) { return len(p), nil } -func TestSync(t *testing.T) { +func TestManagedSync(t *testing.T) { uuid.SetRand(testRand{}) for _, scheduler := range pbPlugin.SyncSpec_SCHEDULER_value { for _, tc := range syncTestCases { @@ -428,20 +428,29 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SC } } -func TestIgnoredColumns(t *testing.T) { - validateResources(t, schema.Resources{{ - Item: struct{ A *string }{}, - Table: &schema.Table{ - Columns: schema.ColumnList{ - { - Name: "a", - Type: arrow.BinaryTypes.String, - IgnoreInTests: true, - }, - }, - }, - }}) -} +// func TestIgnoredColumns(t *testing.T) { +// table := &schema.Table{ +// Columns: schema.ColumnList{ +// { +// Name: "a", +// Type: arrow.BinaryTypes.String, +// IgnoreInTests: true, +// }, +// }, +// } +// validateResources(t, table, schema.Resources{{ +// Item: struct{ A *string }{}, +// Table: &schema.Table{ +// Columns: schema.ColumnList{ +// { +// Name: "a", +// Type: arrow.BinaryTypes.String, +// IgnoreInTests: true, +// }, +// }, +// }, +// }}) +// } var testTable struct { PrimaryKey string diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go new file mode 100644 index 0000000000..8fadf11f6a --- /dev/null +++ b/plugin/plugin_reader.go @@ -0,0 +1,110 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type SyncOptions struct { + Tables []string + SkipTables []string + Concurrency int64 + Scheduler Scheduler + DeterministicCQID bool +} + +// Tables returns all tables supported by this source plugin +func (p *Plugin) StaticTables() schema.Tables { + return p.staticTables +} + +func (p *Plugin) HasDynamicTables() bool { + return p.getDynamicTables != nil +} + +func (p *Plugin) DynamicTables() schema.Tables { + return p.sessionTables +} + +func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { + var readErr error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + readErr = p.Read(ctx, table, sourceName, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, readErr +} + +func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + return p.client.Read(ctx, table, sourceName, res) +} + +func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions) ([]arrow.Record, error) { + var err error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + err = p.Sync(ctx, sourceName, syncTime, options, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, err +} + +// Sync is syncing data from the requested tables in spec to the given channel +func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, syncOptions SyncOptions, res chan<- arrow.Record) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + p.syncTime = syncTime + + startTime := time.Now() + if p.unmanagedSync { + if err := p.client.Sync(ctx, res); err != nil { + return fmt.Errorf("failed to sync unmanaged client: %w", err) + } + } else { + if len(p.sessionTables) == 0 { + return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") + } + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch syncOptions.Scheduler { + case SchedulerDFS: + p.syncDfs(ctx, syncOptions, p.client, p.sessionTables, resources) + case SchedulerRoundRobin: + p.syncRoundRobin(ctx, syncOptions, p.client, p.sessionTables, resources) + default: + panic(fmt.Errorf("unknown scheduler %s", syncOptions.Scheduler)) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec + } + } + + p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") + return nil +} diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go deleted file mode 100644 index e24c15d108..0000000000 --- a/plugin/plugin_round_robin_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package plugin - -import ( - "context" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -func TestPluginRoundRobin(t *testing.T) { - ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanaged()) - testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) - syncTime := time.Now().UTC() - testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ - SourceName: "test", - SyncTime: syncTime, - MaxRows: 1, - }) - spec := pbPlugin.Spec{ - Name: "test", - Path: "cloudquery/test", - Version: "v1.0.0", - Registry: pbPlugin.Spec_REGISTRY_GITHUB, - WriteSpec: &pbPlugin.WriteSpec{}, - SyncSpec: &pbPlugin.SyncSpec{}, - } - if err := p.Init(ctx, spec); err != nil { - t.Fatal(err) - } - - if err := p.Migrate(ctx, schema.Tables{testTable}); err != nil { - t.Fatal(err) - } - if err := p.writeAll(ctx, spec, syncTime, testRecords); err != nil { - t.Fatal(err) - } - gotRecords, err := p.readAll(ctx, testTable, "test") - if err != nil { - t.Fatal(err) - } - if len(gotRecords) != len(testRecords) { - t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) - } - if !array.RecordEqual(testRecords[0], gotRecords[0]) { - t.Fatal("records are not equal") - } - records, err := p.syncAll(ctx, syncTime, *spec.SyncSpec) - if err != nil { - t.Fatal(err) - } - if len(records) != 1 { - t.Fatalf("got %d resources, want 1", len(records)) - } - - if !array.RecordEqual(testRecords[0], records[0]) { - t.Fatal("records are not equal") - } - - newSyncTime := time.Now().UTC() - if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { - t.Fatal(err) - } - records, err = p.syncAll(ctx, syncTime, *spec.SyncSpec) - if err != nil { - t.Fatal(err) - } - if len(records) != 0 { - t.Fatalf("got %d resources, want 0", len(records)) - } - - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } -} diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go new file mode 100644 index 0000000000..04993d5b97 --- /dev/null +++ b/plugin/plugin_test.go @@ -0,0 +1,97 @@ +package plugin + +import ( + "context" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func TestPluginUnmanagedSync(t *testing.T) { + ctx := context.Background() + p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanagedSync()) + testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) + syncTime := time.Now().UTC() + sourceName := "test" + testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ + SourceName: sourceName, + SyncTime: syncTime, + MaxRows: 1, + }) + if err := p.Init(ctx, nil); err != nil { + t.Fatal(err) + } + + if err := p.Migrate(ctx, schema.Tables{testTable}, MigrateModeSafe); err != nil { + t.Fatal(err) + } + if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, testRecords); err != nil { + t.Fatal(err) + } + gotRecords, err := p.readAll(ctx, testTable, "test") + if err != nil { + t.Fatal(err) + } + if len(gotRecords) != len(testRecords) { + t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) + } + if !array.RecordEqual(testRecords[0], gotRecords[0]) { + t.Fatal("records are not equal") + } + records, err := p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + if err != nil { + t.Fatal(err) + } + if len(records) != 1 { + t.Fatalf("got %d resources, want 1", len(records)) + } + + if !array.RecordEqual(testRecords[0], records[0]) { + t.Fatal("records are not equal") + } + + newSyncTime := time.Now().UTC() + if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { + t.Fatal(err) + } + records, err = p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + if err != nil { + t.Fatal(err) + } + if len(records) != 0 { + t.Fatalf("got %d resources, want 0", len(records)) + } + + if err := p.Close(ctx); err != nil { + t.Fatal(err) + } +} + +// func TestPluginInit(t *testing.T) { +// const ( +// batchSize = uint64(100) +// batchSizeBytes = uint64(1000) +// ) + +// var ( +// batchSizeObserved uint64 +// batchSizeBytesObserved uint64 +// ) +// p := NewPlugin( +// "test", +// "development", +// func(ctx context.Context, logger zerolog.Logger, s any) (Client, error) { +// batchSizeObserved = s.WriteSpec.BatchSize +// batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes +// return NewMemDBClient(ctx, logger, s) +// }, +// WithDefaultBatchSize(int(batchSize)), +// WithDefaultBatchSizeBytes(int(batchSizeBytes)), +// ) +// require.NoError(t, p.Init(context.TODO(), nil)) + +// require.Equal(t, batchSize, batchSizeObserved) +// require.Equal(t, batchSizeBytes, batchSizeBytesObserved) +// } diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go new file mode 100644 index 0000000000..4f13e1bdab --- /dev/null +++ b/plugin/plugin_writer.go @@ -0,0 +1,69 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { + return p.client.Migrate(ctx, tables, migrateMode) +} + +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeOne(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resource arrow.Record) error { + resources := []arrow.Record{resource} + return p.writeAll(ctx, sourceName, syncTime, writeMode, resources) +} + +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeAll(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resources []arrow.Record) error { + ch := make(chan arrow.Record, len(resources)) + for _, resource := range resources { + ch <- resource + } + close(ch) + tables := make(schema.Tables, 0) + tableNames := make(map[string]struct{}) + for _, resource := range resources { + sc := resource.Schema() + tableMD := sc.Metadata() + name, found := tableMD.GetValue(schema.MetadataTableName) + if !found { + return fmt.Errorf("missing table name") + } + if _, ok := tableNames[name]; ok { + continue + } + table, err := schema.NewTableFromArrowSchema(resource.Schema()) + if err != nil { + return err + } + tables = append(tables, table) + tableNames[table.Name] = struct{}{} + } + return p.Write(ctx, sourceName, tables, syncTime, writeMode, ch) +} + +func (p *Plugin) Write(ctx context.Context, sourceName string, tables schema.Tables, syncTime time.Time, writeMode WriteMode, res <-chan arrow.Record) error { + syncTime = syncTime.UTC() + if p.managedWriter { + if err := p.writeManagedTableBatch(ctx, tables, writeMode, res); err != nil { + return err + } + } else { + if err := p.client.Write(ctx, tables, writeMode, res); err != nil { + return err + } + } + + return nil +} + +func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + syncTime = syncTime.UTC() + return p.client.DeleteStale(ctx, tables, sourceName, syncTime) +} diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go index 9b592be865..ae074503ab 100644 --- a/plugin/scheduler_dfs.go +++ b/plugin/scheduler_dfs.go @@ -8,17 +8,16 @@ import ( "sync" "sync/atomic" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/helpers" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "golang.org/x/sync/semaphore" ) -func (p *Plugin) syncDfs(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) + tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency p.tableSems = make([]*semaphore.Weighted, p.maxDepth) @@ -173,7 +172,7 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c return } - if err := resolvedResource.CalculateCQID(p.spec.SyncSpec.DetrministicCqId); err != nil { + if err := resolvedResource.CalculateCQID(p.deterministicCQId); err != nil { tableMetrics := p.metrics.TableClient[table.Name][client.ID()] p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go index 5c6e90b391..b4c7592fcf 100644 --- a/plugin/scheduler_round_robin.go +++ b/plugin/scheduler_round_robin.go @@ -4,7 +4,6 @@ import ( "context" "sync" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "golang.org/x/sync/semaphore" ) @@ -14,8 +13,8 @@ type tableClient struct { client schema.ClientMeta } -func (p *Plugin) syncRoundRobin(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) +func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { + tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency p.tableSems = make([]*semaphore.Weighted, p.maxDepth) diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go index 3b2266d080..300900e287 100644 --- a/plugin/testing_overwrite_deletestale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -6,19 +6,18 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE +func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { + writeMode := WriteModeOverwriteDeleteStale if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) + tableName := fmt.Sprintf("cq_overwrite_delete_stale_%d", time.Now().Unix()) table := schema.TestTable(tableName, testOpts.TestSourceOptions) incTable := schema.TestTable(tableName+"_incremental", testOpts.TestSourceOptions) incTable.IsIncremental = true @@ -27,18 +26,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte table, incTable, } - if err := p.Migrate(ctx, tables); err != nil { + if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - BackendSpec: &pbPlugin.Spec{ - Name: "local", - Path: "cloudquery/local", - }, - } opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -50,7 +42,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte incResources := schema.GenTestData(incTable, opts) allResources := resources allResources = append(allResources, incResources...) - if err := p.writeAll(ctx, sourceSpec, syncTime, allResources); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, writeMode, allResources); err != nil { return fmt.Errorf("failed to write all: %w", err) } sortRecordsBySyncTime(table, resources) @@ -103,7 +95,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte allUpdatedResources := updatedResources allUpdatedResources = append(allUpdatedResources, updatedIncResources...) - if err := p.writeAll(ctx, sourceSpec, secondSyncTime, allUpdatedResources); err != nil { + if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, allUpdatedResources); err != nil { return fmt.Errorf("failed to write all second time: %w", err) } diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 0c0d6f939e..59b1cd5a9e 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -9,13 +9,12 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" ) -type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) +type Validator func(t *testing.T, plugin *Plugin, resources []arrow.Record) -func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...TestPluginOption) { +func TestPluginSync(t *testing.T, plugin *Plugin, sourceName string, spec any, options SyncOptions, opts ...TestPluginOption) { t.Helper() o := &testPluginOptions{ @@ -38,7 +37,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te go func() { defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), time.Now(), *spec.SyncSpec, resourcesChannel) + syncErr = plugin.Sync(context.Background(), sourceName, time.Now(), options, resourcesChannel) }() syncedResources := make([]arrow.Record, 0) @@ -48,9 +47,9 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te if syncErr != nil { t.Fatal(syncErr) } - // for _, validator := range o.validators { - // validator(t, plugin, syncedResources) - // } + for _, validator := range o.validators { + validator(t, plugin, syncedResources) + } } type TestPluginOption func(*testPluginOptions) @@ -72,13 +71,18 @@ type testPluginOptions struct { validators []Validator } -func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Resource) []*schema.Resource { +func getTableResources(t *testing.T, table *schema.Table, resources []arrow.Record) []arrow.Record { t.Helper() - tableResources := make([]*schema.Resource, 0) + tableResources := make([]arrow.Record, 0) for _, resource := range resources { - if resource.Table.Name == table.Name { + md := resource.Schema().Metadata() + tableName, ok := md.GetValue(schema.MetadataTableName) + if !ok { + t.Errorf("Expected table name to be set in metadata") + } + if tableName == table.Name { tableResources = append(tableResources, resource) } } @@ -86,17 +90,17 @@ func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Re return tableResources } -func validateTable(t *testing.T, table *schema.Table, resources []*schema.Resource) { +func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) { t.Helper() tableResources := getTableResources(t, table, resources) if len(tableResources) == 0 { t.Errorf("Expected table %s to be synced but it was not found", table.Name) return } - validateResources(t, tableResources) + validateResources(t, table, tableResources) } -func validatePlugin(t *testing.T, plugin *Plugin, resources []*schema.Resource) { +func validatePlugin(t *testing.T, plugin *Plugin, resources []arrow.Record) { t.Helper() tables := extractTables(plugin.staticTables) for _, table := range tables { @@ -115,21 +119,18 @@ func extractTables(tables schema.Tables) []*schema.Table { // Validates that every column has at least one non-nil value. // Also does some additional validations. -func validateResources(t *testing.T, resources []*schema.Resource) { +func validateResources(t *testing.T, table *schema.Table, resources []arrow.Record) { t.Helper() - table := resources[0].Table - // A set of column-names that have values in at least one of the resources. columnsWithValues := make([]bool, len(table.Columns)) for _, resource := range resources { - for i, value := range resource.GetValues() { - if value == nil { - continue - } - if value.IsValid() { - columnsWithValues[i] = true + for _, arr := range resource.Columns() { + for i := 0; i < arr.Len(); i++ { + if arr.IsValid(i) { + columnsWithValues[i] = true + } } } } diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 8f2b3da285..501ff39273 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -10,7 +10,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/rs/zerolog" @@ -22,11 +21,11 @@ type PluginTestSuite struct { // MigrateStrategy defines which tests we should include type MigrateStrategy struct { - AddColumn pbPlugin.WriteSpec_MIGRATE_MODE - AddColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE - RemoveColumn pbPlugin.WriteSpec_MIGRATE_MODE - RemoveColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE - ChangeColumn pbPlugin.WriteSpec_MIGRATE_MODE + AddColumn MigrateMode + AddColumnNotNull MigrateMode + RemoveColumn MigrateMode + RemoveColumnNotNull MigrateMode + ChangeColumn MigrateMode } type PluginTestSuiteTests struct { @@ -167,12 +166,8 @@ func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { } } -func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlugin.Spec, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { +func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { t.Helper() - destSpec.Name = "testsuite" - if destSpec.WriteSpec == nil { - destSpec.WriteSpec = &pbPlugin.WriteSpec{} - } suite := &PluginTestSuite{ tests: tests, } @@ -194,9 +189,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipOverwrite { t.Skip("skipping " + t.Name()) } - destSpec.Name = "test_write_overwrite" p := newPlugin() - if err := suite.destinationPluginTestWriteOverwrite(ctx, p, logger, destSpec, opts); err != nil { + if err := suite.destinationPluginTestWriteOverwrite(ctx, p, logger, spec, opts); err != nil { t.Fatal(err) } if err := p.Close(ctx); err != nil { @@ -209,9 +203,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipOverwrite || suite.tests.SkipDeleteStale { t.Skip("skipping " + t.Name()) } - destSpec.Name = "test_write_overwrite_delete_stale" p := newPlugin() - if err := suite.destinationPluginTestWriteOverwriteDeleteStale(ctx, p, logger, destSpec, opts); err != nil { + if err := suite.destinationPluginTestWriteOverwriteDeleteStale(ctx, p, logger, spec, opts); err != nil { t.Fatal(err) } if err := p.Close(ctx); err != nil { @@ -224,10 +217,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateOverwrite { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - destSpec.Name = "test_migrate_overwrite" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) + migrateMode := MigrateModeSafe + writeMode := WriteModeOverwrite + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) t.Run("TestMigrateOverwriteForce", func(t *testing.T) { @@ -235,10 +227,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateOverwriteForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - destSpec.Name = "test_migrate_overwrite_force" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) + migrateMode := MigrateModeForced + writeMode := WriteModeOverwrite + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) t.Run("TestWriteAppend", func(t *testing.T) { @@ -246,9 +237,10 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipAppend { t.Skip("skipping " + t.Name()) } - destSpec.Name = "test_write_append" + migrateMode := MigrateModeSafe + writeMode := WriteModeOverwrite p := newPlugin() - if err := suite.destinationPluginTestWriteAppend(ctx, p, logger, destSpec, opts); err != nil { + if err := suite.destinationPluginTestWriteAppend(ctx, p, logger, migrateMode, writeMode, opts); err != nil { t.Fatal(err) } if err := p.Close(ctx); err != nil { @@ -261,10 +253,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateAppend { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - destSpec.Name = "test_migrate_append" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) + migrateMode := MigrateModeSafe + writeMode := WriteModeAppend + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) }) t.Run("TestMigrateAppendForce", func(t *testing.T) { @@ -272,10 +263,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateAppendForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - destSpec.Name = "test_migrate_append_force" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) + migrateMode := MigrateModeForced + writeMode := WriteModeAppend + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) }) } diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go index 4720431062..68efc4ca8b 100644 --- a/plugin/testing_write_append.go +++ b/plugin/testing_write_append.go @@ -6,31 +6,26 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - if err := p.Init(ctx, spec); err != nil { +func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, migrateMode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { + if err := p.Init(ctx, nil); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) + tableName := fmt.Sprintf("cq_write_append_%d", time.Now().Unix()) table := schema.TestTable(tableName, testOpts.TestSourceOptions) syncTime := time.Now().UTC().Round(1 * time.Second) tables := schema.Tables{ table, } - if err := p.Migrate(ctx, tables); err != nil { + if err := p.Migrate(ctx, tables, migrateMode); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := "testAppendSource" + uuid.NewString() - specSource := pbPlugin.Spec{ - Name: sourceName, - } opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -39,7 +34,7 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, TimePrecision: testOpts.TimePrecision, } record1 := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, specSource, syncTime, record1); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, writeMode, record1); err != nil { return fmt.Errorf("failed to write record first time: %w", err) } @@ -50,7 +45,7 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, if !s.tests.SkipSecondAppend { // write second time - if err := p.writeAll(ctx, specSource, secondSyncTime, record2); err != nil { + if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, record2); err != nil { return fmt.Errorf("failed to write one second time: %w", err) } } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 9ac2021866..d59da7fc8b 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -9,7 +9,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" @@ -21,19 +20,16 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, target *schema.Table, source *schema.Table, mode pbPlugin.WriteSpec_MIGRATE_MODE, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, spec); err != nil { +func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, target *schema.Table, source *schema.Table, mode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { + if err := p.Init(ctx, nil); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - if err := p.Migrate(ctx, schema.Tables{source}); err != nil { + if err := p.Migrate(ctx, schema.Tables{source}, mode); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := target.Name - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } syncTime := time.Now().UTC().Round(1 * time.Second) opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -42,16 +38,16 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. TimePrecision: testOpts.TimePrecision, } resource1 := schema.GenTestData(source, opts)[0] - if err := p.writeOne(ctx, sourceSpec, syncTime, resource1); err != nil { + if err := p.writeOne(ctx, sourceName, syncTime, writeMode, resource1); err != nil { return fmt.Errorf("failed to write one: %w", err) } - if err := p.Migrate(ctx, schema.Tables{target}); err != nil { + if err := p.Migrate(ctx, schema.Tables{target}, mode); err != nil { return fmt.Errorf("failed to migrate existing table: %w", err) } opts.SyncTime = syncTime.Add(time.Second).UTC() resource2 := schema.GenTestData(target, opts) - if err := p.writeAll(ctx, sourceSpec, syncTime, resource2); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, writeMode, resource2); err != nil { return fmt.Errorf("failed to write one after migration: %w", err) } @@ -65,7 +61,7 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. return fmt.Errorf("failed to read all: %w", err) } sortRecordsBySyncTime(target, resourcesRead) - if mode == pbPlugin.WriteSpec_SAFE { + if mode == MigrateModeSafe { if len(resourcesRead) != 2 { return fmt.Errorf("expected 2 resources after write, got %d", len(resourcesRead)) } @@ -91,14 +87,13 @@ func (*PluginTestSuite) destinationPluginTestMigrate( t *testing.T, newPlugin NewPluginFunc, logger zerolog.Logger, - spec pbPlugin.Spec, + migrateMode MigrateMode, + writeMode WriteMode, strategy MigrateStrategy, testOpts PluginTestSuiteRunnerOptions, ) { - spec.WriteSpec.BatchSize = 1 - t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.AddColumn == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -125,7 +120,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( } p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.AddColumn, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumn, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } if err := p.Close(ctx); err != nil { @@ -134,7 +129,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.AddColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -159,7 +154,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.AddColumnNotNull, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumnNotNull, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } if err := p.Close(ctx); err != nil { @@ -168,7 +163,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.RemoveColumn == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -192,7 +187,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.RemoveColumn, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumn, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } if err := p.Close(ctx); err != nil { @@ -201,7 +196,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.RemoveColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -226,7 +221,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.RemoveColumnNotNull, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumnNotNull, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } if err := p.Close(ctx); err != nil { @@ -235,7 +230,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.ChangeColumn == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -260,7 +255,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.ChangeColumn, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.ChangeColumn, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } if err := p.Close(ctx); err != nil { @@ -273,12 +268,10 @@ func (*PluginTestSuite) destinationPluginTestMigrate( table := schema.TestTable(tableName, testOpts.TestSourceOptions) p := newPlugin() - require.NoError(t, p.Init(ctx, spec)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table})) + require.NoError(t, p.Init(ctx, nil)) + require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) - nonForced := spec - nonForced.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - require.NoError(t, p.Init(ctx, nonForced)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table})) + require.NoError(t, p.Init(ctx, MigrateModeSafe)) + require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) }) } diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go index 12c8400053..34e89e8b2d 100644 --- a/plugin/testing_write_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -6,32 +6,27 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE +func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) + tableName := fmt.Sprintf("cq_test_write_overwrite_%d", time.Now().Unix()) table := schema.TestTable(tableName, testOpts.TestSourceOptions) syncTime := time.Now().UTC().Round(1 * time.Second) tables := schema.Tables{ table, } - if err := p.Migrate(ctx, tables); err != nil { + if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -40,7 +35,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, TimePrecision: testOpts.TimePrecision, } resources := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, sourceSpec, syncTime, resources); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, resources); err != nil { return fmt.Errorf("failed to write all: %w", err) } sortRecordsBySyncTime(table, resources) @@ -82,7 +77,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } updatedResource := schema.GenTestData(table, opts) // write second time - if err := p.writeAll(ctx, sourceSpec, secondSyncTime, updatedResource); err != nil { + if err := p.writeAll(ctx, sourceName, secondSyncTime, WriteModeOverwrite, updatedResource); err != nil { return fmt.Errorf("failed to write one second time: %w", err) } diff --git a/serve/plugin.go b/serve/plugin.go index 0adddb4b9c..f4dbfe9e5b 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -33,19 +33,19 @@ import ( "google.golang.org/grpc/test/bufconn" ) -type pluginServe struct { +type PluginServe struct { plugin *plugin.Plugin - args []string + args []string destinationV0V1Server bool sentryDSN string - testListener bool - testListenerConn *bufconn.Listener + testListener bool + testListenerConn *bufconn.Listener } -type PluginOption func(*pluginServe) +type PluginOption func(*PluginServe) func WithPluginSentryDSN(dsn string) PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.sentryDSN = dsn } } @@ -53,14 +53,14 @@ func WithPluginSentryDSN(dsn string) PluginOption { // WithDestinationV0V1Server is used to include destination v0 and v1 server to work // with older sources func WithDestinationV0V1Server() PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.destinationV0V1Server = true } } // WithArgs used to serve the plugin with predefined args instead of os.Args func WithArgs(args ...string) PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.args = args } } @@ -68,7 +68,7 @@ func WithArgs(args ...string) PluginOption { // WithTestListener means that the plugin will be served with an in-memory listener // available via testListener() method instead of a network listener. func WithTestListener() PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.testListener = true s.testListenerConn = bufconn.Listen(testBufSize) } @@ -76,9 +76,9 @@ func WithTestListener() PluginOption { const servePluginShort = `Start plugin server` -func Plugin(plugin *plugin.Plugin, opts ...PluginOption) *pluginServe{ - s := &pluginServe{ - plugin: plugin, +func Plugin(p *plugin.Plugin, opts ...PluginOption) *PluginServe { + s := &PluginServe{ + plugin: p, } for _, opt := range opts { opt(s) @@ -86,12 +86,14 @@ func Plugin(plugin *plugin.Plugin, opts ...PluginOption) *pluginServe{ return s } -func (s *pluginServe) bufPluginDialer(context.Context, string) (net.Conn, error) { +func (s *PluginServe) bufPluginDialer(context.Context, string) (net.Conn, error) { return s.testListenerConn.Dial() } -func (s *pluginServe) Serve(ctx context.Context) error { - types.RegisterAllExtensions() +func (s *PluginServe) Serve(ctx context.Context) error { + if err := types.RegisterAllExtensions(); err != nil { + return err + } cmd := s.newCmdPluginRoot() if s.args != nil { cmd.SetArgs(s.args) @@ -99,7 +101,7 @@ func (s *pluginServe) Serve(ctx context.Context) error { return cmd.ExecuteContext(ctx) } -func (serve *pluginServe) newCmdPluginServe() *cobra.Command { +func (s *PluginServe) newCmdPluginServe() *cobra.Command { var address string var network string var noSentry bool @@ -131,8 +133,8 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { // opts.Plugin.Logger = logger var listener net.Listener - if serve.testListener { - listener = serve.testListenerConn + if s.testListener { + listener = s.testListenerConn } else { listener, err = net.Listen(network, address) if err != nil { @@ -143,7 +145,7 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { // unlike destination plugins that can accept multiple connections limitListener := netutil.LimitListener(listener, 1) // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go - s := grpc.NewServer( + grpcServer := grpc.NewServer( grpc.ChainUnaryInterceptor( logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), ), @@ -153,30 +155,30 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { grpc.MaxRecvMsgSize(MaxMsgSize), grpc.MaxSendMsgSize(MaxMsgSize), ) - serve.plugin.SetLogger(logger) - pbv3.RegisterPluginServer(s, &serversv3.Server{ - Plugin: serve.plugin, + s.plugin.SetLogger(logger) + pbv3.RegisterPluginServer(grpcServer, &serversv3.Server{ + Plugin: s.plugin, Logger: logger, }) - if serve.destinationV0V1Server { - pbDestinationV1.RegisterDestinationServer(s, &serverDestinationV1.Server{ - Plugin: serve.plugin, + if s.destinationV0V1Server { + pbDestinationV1.RegisterDestinationServer(grpcServer, &serverDestinationV1.Server{ + Plugin: s.plugin, Logger: logger, }) - pbDestinationV0.RegisterDestinationServer(s, &serverDestinationV0.Server{ - Plugin: serve.plugin, + pbDestinationV0.RegisterDestinationServer(grpcServer, &serverDestinationV0.Server{ + Plugin: s.plugin, Logger: logger, }) } - pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ + pbdiscoveryv0.RegisterDiscoveryServer(grpcServer, &discoveryServerV0.Server{ Versions: []string{"v0", "v1", "v2", "v3"}, }) - version := serve.plugin.Version() + version := s.plugin.Version() - if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { + if s.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { err = sentry.Init(sentry.ClientOptions{ - Dsn: serve.sentryDSN, + Dsn: s.sentryDSN, Debug: false, AttachStacktrace: false, Release: version, @@ -210,15 +212,15 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { select { case sig := <-c: logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Source plugin server shutting down") - s.Stop() + grpcServer.Stop() case <-ctx.Done(): logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Source plugin server shutting down") - s.Stop() + grpcServer.Stop() } }() logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") - if err := s.Serve(limitListener); err != nil { + if err := grpcServer.Serve(limitListener); err != nil { return fmt.Errorf("failed to serve: %w", err) } return nil @@ -251,7 +253,7 @@ doc --format json . ` ) -func (serve *pluginServe) newCmdPluginDoc() *cobra.Command { +func (s *PluginServe) newCmdPluginDoc() *cobra.Command { format := newEnum([]string{"json", "markdown"}, "markdown") cmd := &cobra.Command{ Use: "doc ", @@ -260,20 +262,20 @@ func (serve *pluginServe) newCmdPluginDoc() *cobra.Command { Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) - return serve.plugin.GeneratePluginDocs(args[0], pbFormat) + return s.plugin.GeneratePluginDocs(args[0], pbFormat) }, } cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) return cmd } -func (serve *pluginServe) newCmdPluginRoot() *cobra.Command { +func (s *PluginServe) newCmdPluginRoot() *cobra.Command { cmd := &cobra.Command{ - Use: fmt.Sprintf("%s ", serve.plugin.Name()), + Use: fmt.Sprintf("%s ", s.plugin.Name()), } - cmd.AddCommand(serve.newCmdPluginServe()) - cmd.AddCommand(serve.newCmdPluginDoc()) + cmd.AddCommand(s.newCmdPluginServe()) + cmd.AddCommand(s.newCmdPluginDoc()) cmd.CompletionOptions.DisableDefaultCmd = true - cmd.Version = serve.plugin.Version() + cmd.Version = s.plugin.Version() return cmd } diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 16f719abc8..9135012f0c 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -58,11 +58,11 @@ func (*testExecutionClient) Close(ctx context.Context) error { return nil } -func newTestExecutionClient(context.Context, zerolog.Logger, pb.Spec) (plugin.Client, error) { +func newTestExecutionClient(context.Context, zerolog.Logger, any) (plugin.Client, error) { return &testExecutionClient{}, nil } -func TestSourceSuccess(t *testing.T) { +func TestPlugin(t *testing.T) { p := plugin.NewPlugin( "testPlugin", "v1.0.0", @@ -106,16 +106,6 @@ func TestSourceSuccess(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } - spec := pb.Spec{ - Name: "testSourcePlugin", - Version: "v1.0.0", - Path: "cloudquery/testSourcePlugin", - SyncSpec: &pb.SyncSpec{ - Tables: []string{"test_table"}, - Destinations: []string{"test"}, - }, - } - getTablesRes, err := c.GetStaticTables(ctx, &pb.GetStaticTables_Request{}) if err != nil { t.Fatal(err) @@ -129,7 +119,7 @@ func TestSourceSuccess(t *testing.T) { if len(tables) != 2 { t.Fatalf("Expected 2 tables but got %d", len(tables)) } - if _, err := c.Init(ctx, &pb.Init_Request{Spec: &spec}); err != nil { + if _, err := c.Init(ctx, &pb.Init_Request{}); err != nil { t.Fatal(err) } From 50bd8f1c01a89369bd62dd8d2f83fd77effca9a7 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 2 Jun 2023 18:08:33 +0300 Subject: [PATCH 007/125] wip --- go.mod | 14 +- go.sum | 38 ++++- internal/plugins/local/local.go | 154 ++++++++++++++++++ .../{backends => plugins}/local/local_test.go | 2 +- internal/state/state.go | 27 +++ plugin/plugin_managed_source_test.go | 32 ++-- serve/destination_v0_test.go | 2 +- serve/destination_v1_test.go | 2 +- 8 files changed, 241 insertions(+), 30 deletions(-) create mode 100644 internal/plugins/local/local.go rename internal/{backends => plugins}/local/local_test.go (98%) create mode 100644 internal/state/state.go diff --git a/go.mod b/go.mod index 082be87eb7..de7db51fe8 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/cloudquery/plugin-sdk/v4 go 1.19 require ( - github.com/apache/arrow/go/v13 v13.0.0-20230525142029-2d32efeedad8 + github.com/apache/arrow/go/v13 v13.0.0-20230601214540-018e7d3f9c4b github.com/bradleyjkemp/cupaloy/v2 v2.8.0 github.com/cloudquery/plugin-pb-go v1.1.0 github.com/cloudquery/plugin-sdk/v2 v2.7.0 @@ -14,7 +14,6 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2 v2.0.0-rc.3 github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.3 github.com/rs/zerolog v1.29.1 - github.com/spf13/cast v1.5.0 github.com/spf13/cobra v1.6.1 github.com/stretchr/testify v1.8.4 github.com/thoas/go-funk v0.9.3 @@ -33,7 +32,7 @@ replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go require ( github.com/andybalholm/brotli v1.0.5 // indirect github.com/apache/thrift v0.16.0 // indirect - github.com/cloudquery/plugin-sdk/v3 v3.7.0 // indirect + github.com/avast/retry-go/v4 v4.3.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -43,12 +42,19 @@ require ( github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.18 // indirect - github.com/pierrec/lz4/v4 v4.1.17 // indirect + github.com/mattn/go-runewidth v0.0.14 // indirect + github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect + github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect + github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect + github.com/pierrec/lz4/v4 v4.1.15 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/schollz/progressbar/v3 v3.13.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/mod v0.8.0 // indirect golang.org/x/sys v0.7.0 // indirect + golang.org/x/term v0.7.0 // indirect golang.org/x/tools v0.6.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230530153820-e85fd2cbaebc // indirect diff --git a/go.sum b/go.sum index 17a7a98de3..0ebdc98a2f 100644 --- a/go.sum +++ b/go.sum @@ -33,6 +33,13 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= +github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= +github.com/avast/retry-go/v4 v4.3.4 h1:pHLkL7jvCvP317I8Ge+Km2Yhntv3SdkJm7uekkqbKhM= +github.com/avast/retry-go/v4 v4.3.4/go.mod h1:rv+Nla6Vk3/ilU0H51VHddWHiwimzX66yZ0JT6T+UvE= github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M= github.com/bradleyjkemp/cupaloy/v2 v2.8.0/go.mod h1:bm7JXdkRd4BHJk9HpwqAI8BoAY1lps46Enkdqw6aRX0= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -44,8 +51,6 @@ github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSE github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= -github.com/cloudquery/plugin-sdk/v3 v3.7.0 h1:aRazh17V+6AA00vmxPZRv2rudNEerSd3kqbyffRl6SA= -github.com/cloudquery/plugin-sdk/v3 v3.7.0/go.mod h1:z9Fny7SO8fNyVx6bOTM037lo7h3vJI+ZHUc/RMj20VU= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= @@ -59,7 +64,6 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/getsentry/sentry-go v0.20.0 h1:bwXW98iMRIWxn+4FgPW7vMrjmbym6HblXALmhjHmQaQ= github.com/getsentry/sentry-go v0.20.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= @@ -142,6 +146,7 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= @@ -158,8 +163,20 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +<<<<<<< HEAD +======= +github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= +github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +>>>>>>> 446b805 (wip) github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= @@ -170,6 +187,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= @@ -178,14 +197,20 @@ github.com/rs/zerolog v1.19.0/go.mod h1:IzD0RJ65iWH0w97OQQebJEvTZYvsCUm9WVLWBQrJ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc= github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= -github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= +github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= +github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +<<<<<<< HEAD +======= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +>>>>>>> 446b805 (wip) github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -324,6 +349,9 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= +golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= +golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/internal/plugins/local/local.go b/internal/plugins/local/local.go new file mode 100644 index 0000000000..1780f67162 --- /dev/null +++ b/internal/plugins/local/local.go @@ -0,0 +1,154 @@ +package local + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "path" + "strings" + "sync" + + "github.com/rs/zerolog" +) + +type LocalSpec struct { + Path string `json:"path"` +} + +type Local struct { + sourceName string + spec *LocalSpec + tables map[string]entries // table -> key -> value + tablesLock sync.RWMutex +} + +type entries map[string]string + + +func New(_ context.Context, _ zerolog.Logger, spec any) (*Local, error) { + l := &Local{ + spec: spec.(*LocalSpec), + } + tables, err := l.loadPreviousState() + if err != nil { + return nil, err + } + if tables == nil { + tables = map[string]entries{} + } + l.tables = tables + return l, nil +} + +func (l *Local) loadPreviousState() (map[string]entries, error) { + files, err := os.ReadDir(l.spec.Path) + if os.IsNotExist(err) { + return nil, nil + } + var tables = map[string]entries{} + for _, f := range files { + if f.IsDir() || !f.Type().IsRegular() { + continue + } + name := f.Name() + if !strings.HasSuffix(name, ".json") || !strings.HasPrefix(name, l.sourceName+"-") { + continue + } + table, kv, err := l.readFile(name) + if err != nil { + return nil, err + } + tables[table] = kv + } + return tables, nil +} + +func (l *Local) readFile(name string) (table string, kv entries, err error) { + p := path.Join(l.spec.Path, name) + f, err := os.Open(p) + if err != nil { + return "", nil, fmt.Errorf("failed to open state file: %w", err) + } + b, err := io.ReadAll(f) + if err != nil { + return "", nil, fmt.Errorf("failed to read state file: %w", err) + } + err = f.Close() + if err != nil { + return "", nil, fmt.Errorf("failed to close state file: %w", err) + } + err = json.Unmarshal(b, &kv) + if err != nil { + return "", nil, fmt.Errorf("failed to unmarshal state file: %w", err) + } + table = strings.TrimPrefix(strings.TrimSuffix(name, ".json"), l.sourceName+"-") + return table, kv, nil +} + +func (l *Local) Get(_ context.Context, table, clientID string) (string, error) { + l.tablesLock.RLock() + defer l.tablesLock.RUnlock() + + if _, ok := l.tables[table]; !ok { + return "", nil + } + return l.tables[table][clientID], nil +} + +func (l *Local) Set(_ context.Context, table, clientID, value string) error { + l.tablesLock.Lock() + defer l.tablesLock.Unlock() + + if _, ok := l.tables[table]; !ok { + l.tables[table] = map[string]string{} + } + prev := l.tables[table][clientID] + l.tables[table][clientID] = value + if prev != value { + // only flush if the value changed + return l.flushTable(table, l.tables[table]) + } + return nil +} + +func (l *Local) Close(_ context.Context) error { + l.tablesLock.RLock() + defer l.tablesLock.RUnlock() + + return l.flush() +} + +func (l *Local) flush() error { + for table, kv := range l.tables { + err := l.flushTable(table, kv) + if err != nil { + return err + } + } + return nil +} + +func (l *Local) flushTable(table string, entries entries) error { + if len(entries) == 0 { + return nil + } + + err := os.MkdirAll(l.spec.Path, 0755) + if err != nil { + return fmt.Errorf("failed to create state directory %v: %w", l.spec.Path, err) + } + + b, err := json.MarshalIndent(entries, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal state for table %v: %w", table, err) + } + f := path.Join(l.spec.Path, l.sourceName+"-"+table+".json") + err = os.WriteFile(f, b, 0644) + if err != nil { + return fmt.Errorf("failed to write state for table %v: %w", table, err) + } + + return nil +} diff --git a/internal/backends/local/local_test.go b/internal/plugins/local/local_test.go similarity index 98% rename from internal/backends/local/local_test.go rename to internal/plugins/local/local_test.go index 4e3423f9d8..58fb073ed3 100644 --- a/internal/backends/local/local_test.go +++ b/internal/plugins/local/local_test.go @@ -4,7 +4,7 @@ import ( "context" "testing" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" ) func TestLocal(t *testing.T) { diff --git a/internal/state/state.go b/internal/state/state.go new file mode 100644 index 0000000000..1dc00b79f9 --- /dev/null +++ b/internal/state/state.go @@ -0,0 +1,27 @@ +package state + +import "context" + +type Client struct { + // managedPlugin managedplugin.Client +} + +// func NewState(ctx context.Context, managedPlugin managedplugin.Client) *Client { +// return &Client{ +// managedPlugin: managedPlugin, +// } +// c := pbPlugin.NewPluginClient(managedPlugin.Conn) +// c.Write(ctx, ) +// } + +func NewState(spec any) *Client { + return &Client{} +} + +func (* Client) SetKey(ctx context.Context, key string, value string) error { + return nil +} + +func (* Client) GetKey(ctx context.Context, key string) (string, error) { + return "", nil +} \ No newline at end of file diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index 3c55c7d4ae..8520c3a7be 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" @@ -358,18 +359,18 @@ func (testRand) Read(p []byte) (n int, err error) { func TestManagedSync(t *testing.T) { uuid.SetRand(testRand{}) - for _, scheduler := range pbPlugin.SyncSpec_SCHEDULER_value { + for _, scheduler := range plugin.AllSchedulers { for _, tc := range syncTestCases { tc := tc tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+pbPlugin.SyncSpec_SCHEDULER(scheduler).String(), func(t *testing.T) { - testSyncTable(t, tc, pbPlugin.SyncSpec_SCHEDULER(scheduler), tc.deterministicCQID) + t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { + testSyncTable(t, tc, scheduler, tc.deterministicCQID) }) } } } -func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SCHEDULER, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, scheduler plugin.Scheduler, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, @@ -382,23 +383,18 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SC WithStaticTables(tables), ) plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - spec := pbPlugin.Spec{ - Name: "testSource", - Path: "cloudquery/testSource", - Version: "v1.0.0", - SyncSpec: &pbPlugin.SyncSpec{ - Tables: []string{"*"}, - Destinations: []string{"test"}, - Concurrency: 1, // choose a very low value to check that we don't run into deadlocks - Scheduler: scheduler, - DetrministicCqId: deterministicCQID, - }, - } - if err := plugin.Init(ctx, spec); err != nil { + sourceName := "testSource" + + if err := plugin.Init(ctx, nil); err != nil { t.Fatal(err) } - records, err := plugin.syncAll(ctx, testSyncTime, *spec.SyncSpec) + records, err := plugin.syncAll(ctx, sourceName, testSyncTime, SyncOptions{ + Tables: []string{"*"}, + Concurrency: 1, + Scheduler: scheduler, + DeterministicCQID: deterministicCQID, + }) if err != nil { t.Fatal(err) } diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 6a206e53ad..a1ba81f794 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -12,7 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 577fd8f9ff..c65320eea1 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -12,7 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" From 87ff461cfa7c062b03034a181c27fb156f0b2d2b Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 4 Jun 2023 11:34:00 +0300 Subject: [PATCH 008/125] more wip --- internal/plugins/local/local.go | 154 ------------------- internal/plugins/local/local_test.go | 101 ------------- internal/servers/discovery/v1/discovery.go | 16 ++ internal/servers/plugin/v3/plugin.go | 31 +++- internal/servers/plugin/v3/state.go | 165 +++++++++++++++++++++ internal/state/state.go | 27 ---- plugin/memdb.go | 6 +- plugin/options.go | 27 ++++ plugin/plugin.go | 16 +- plugin/plugin_managed_sync.go | 45 ++++++ plugin/plugin_reader.go | 41 ++--- plugin/plugin_writer.go | 3 + plugin/scheduler_dfs.go | 2 +- plugin/scheduler_round_robin.go | 2 +- plugin/state.go | 1 + serve/plugin.go | 6 + serve/plugin_test.go | 4 + serve/state_v3_test.go | 57 +++++++ state/state.go | 8 + 19 files changed, 393 insertions(+), 319 deletions(-) delete mode 100644 internal/plugins/local/local.go delete mode 100644 internal/plugins/local/local_test.go create mode 100644 internal/servers/discovery/v1/discovery.go create mode 100644 internal/servers/plugin/v3/state.go delete mode 100644 internal/state/state.go create mode 100644 plugin/plugin_managed_sync.go create mode 100644 plugin/state.go create mode 100644 serve/state_v3_test.go create mode 100644 state/state.go diff --git a/internal/plugins/local/local.go b/internal/plugins/local/local.go deleted file mode 100644 index 1780f67162..0000000000 --- a/internal/plugins/local/local.go +++ /dev/null @@ -1,154 +0,0 @@ -package local - -import ( - "context" - "encoding/json" - "fmt" - "io" - "os" - "path" - "strings" - "sync" - - "github.com/rs/zerolog" -) - -type LocalSpec struct { - Path string `json:"path"` -} - -type Local struct { - sourceName string - spec *LocalSpec - tables map[string]entries // table -> key -> value - tablesLock sync.RWMutex -} - -type entries map[string]string - - -func New(_ context.Context, _ zerolog.Logger, spec any) (*Local, error) { - l := &Local{ - spec: spec.(*LocalSpec), - } - tables, err := l.loadPreviousState() - if err != nil { - return nil, err - } - if tables == nil { - tables = map[string]entries{} - } - l.tables = tables - return l, nil -} - -func (l *Local) loadPreviousState() (map[string]entries, error) { - files, err := os.ReadDir(l.spec.Path) - if os.IsNotExist(err) { - return nil, nil - } - var tables = map[string]entries{} - for _, f := range files { - if f.IsDir() || !f.Type().IsRegular() { - continue - } - name := f.Name() - if !strings.HasSuffix(name, ".json") || !strings.HasPrefix(name, l.sourceName+"-") { - continue - } - table, kv, err := l.readFile(name) - if err != nil { - return nil, err - } - tables[table] = kv - } - return tables, nil -} - -func (l *Local) readFile(name string) (table string, kv entries, err error) { - p := path.Join(l.spec.Path, name) - f, err := os.Open(p) - if err != nil { - return "", nil, fmt.Errorf("failed to open state file: %w", err) - } - b, err := io.ReadAll(f) - if err != nil { - return "", nil, fmt.Errorf("failed to read state file: %w", err) - } - err = f.Close() - if err != nil { - return "", nil, fmt.Errorf("failed to close state file: %w", err) - } - err = json.Unmarshal(b, &kv) - if err != nil { - return "", nil, fmt.Errorf("failed to unmarshal state file: %w", err) - } - table = strings.TrimPrefix(strings.TrimSuffix(name, ".json"), l.sourceName+"-") - return table, kv, nil -} - -func (l *Local) Get(_ context.Context, table, clientID string) (string, error) { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - if _, ok := l.tables[table]; !ok { - return "", nil - } - return l.tables[table][clientID], nil -} - -func (l *Local) Set(_ context.Context, table, clientID, value string) error { - l.tablesLock.Lock() - defer l.tablesLock.Unlock() - - if _, ok := l.tables[table]; !ok { - l.tables[table] = map[string]string{} - } - prev := l.tables[table][clientID] - l.tables[table][clientID] = value - if prev != value { - // only flush if the value changed - return l.flushTable(table, l.tables[table]) - } - return nil -} - -func (l *Local) Close(_ context.Context) error { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - return l.flush() -} - -func (l *Local) flush() error { - for table, kv := range l.tables { - err := l.flushTable(table, kv) - if err != nil { - return err - } - } - return nil -} - -func (l *Local) flushTable(table string, entries entries) error { - if len(entries) == 0 { - return nil - } - - err := os.MkdirAll(l.spec.Path, 0755) - if err != nil { - return fmt.Errorf("failed to create state directory %v: %w", l.spec.Path, err) - } - - b, err := json.MarshalIndent(entries, "", " ") - if err != nil { - return fmt.Errorf("failed to marshal state for table %v: %w", table, err) - } - f := path.Join(l.spec.Path, l.sourceName+"-"+table+".json") - err = os.WriteFile(f, b, 0644) - if err != nil { - return fmt.Errorf("failed to write state for table %v: %w", table, err) - } - - return nil -} diff --git a/internal/plugins/local/local_test.go b/internal/plugins/local/local_test.go deleted file mode 100644 index 58fb073ed3..0000000000 --- a/internal/plugins/local/local_test.go +++ /dev/null @@ -1,101 +0,0 @@ -package local - -import ( - "context" - "testing" - - "github.com/cloudquery/plugin-pb-go/specs/v0" -) - -func TestLocal(t *testing.T) { - tmpDir := t.TempDir() - ctx := context.Background() - ss := specs.Source{ - Name: "test", - Version: "vtest", - Path: "test", - Backend: specs.BackendLocal, - BackendSpec: Spec{ - Path: tmpDir, - }, - } - local, err := New(ss) - if err != nil { - t.Fatalf("failed to create local backend: %v", err) - } - if local.spec.Path != tmpDir { - t.Fatalf("expected path to be %s, but got %s", tmpDir, local.spec.Path) - } - - tableName := "test_table" - clientID := "test_client" - got, err := local.Get(ctx, tableName, clientID) - if err != nil { - t.Fatalf("failed to get value: %v", err) - } - if got != "" { - t.Fatalf("expected empty value, but got %s", got) - } - - err = local.Set(ctx, tableName, clientID, "test_value") - if err != nil { - t.Fatalf("failed to set value: %v", err) - } - - got, err = local.Get(ctx, tableName, clientID) - if err != nil { - t.Fatalf("failed to get value after setting it: %v", err) - } - if got != "test_value" { - t.Fatalf("expected value to be test_value, but got %s", got) - } - - err = local.Close(ctx) - if err != nil { - t.Fatalf("failed to close local backend: %v", err) - } - - local, err = New(ss) - if err != nil { - t.Fatalf("failed to open local backend after closing it: %v", err) - } - - got, err = local.Get(ctx, tableName, clientID) - if err != nil { - t.Fatalf("failed to get value after closing and reopening local backend: %v", err) - } - if got != "test_value" { - t.Fatalf("expected value to be test_value, but got %s", got) - } - - got, err = local.Get(ctx, "some_other_table", clientID) - if err != nil { - t.Fatalf("failed to get value after closing and reopening local backend: %v", err) - } - if got != "" { - t.Fatalf("expected empty value for some_other_table -> test_key, but got %s", got) - } - err = local.Close(ctx) - if err != nil { - t.Fatalf("failed to close local backend the second time: %v", err) - } - - // check that state is namespaced by source name - ss.Name = "test2" - local2, err := New(ss) - if err != nil { - t.Fatalf("failed to create local backend for test2: %v", err) - } - - got, err = local2.Get(ctx, "test_table", clientID) - if err != nil { - t.Fatalf("failed to get value for local backend test2: %v", err) - } - if got != "" { - t.Fatalf("expected empty value for test2 -> test_table -> test_key, but got %s", got) - } - err = local2.Close(ctx) - if err != nil { - t.Fatalf("failed to close second local backend: %v", err) - } -} diff --git a/internal/servers/discovery/v1/discovery.go b/internal/servers/discovery/v1/discovery.go new file mode 100644 index 0000000000..47c0197ec7 --- /dev/null +++ b/internal/servers/discovery/v1/discovery.go @@ -0,0 +1,16 @@ +package discovery + +import ( + "context" + + pb "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" +) + +type Server struct { + pb.UnimplementedDiscoveryServer + Versions []uint64 +} + +func (s *Server) GetVersions(context.Context, *pb.GetVersions_Request) (*pb.GetVersions_Response, error) { + return &pb.GetVersions_Response{Versions: s.Versions}, nil +} diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index bbeb968859..b50de13253 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -12,6 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -29,6 +30,8 @@ type Server struct { pb.UnimplementedPluginServer Plugin *plugin.Plugin Logger zerolog.Logger + Directory string + NoSentry bool } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -92,6 +95,29 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { sourceName := req.SourceName + if req.StateBackend != nil { + opts := []managedplugin.Option{ + managedplugin.WithLogger(s.Logger), + managedplugin.WithDirectory(s.Directory), + } + if s.NoSentry { + opts = append(opts, managedplugin.WithNoSentry()) + } + statePlugin, err := managedplugin.NewClient(ctx, managedplugin.Config{ + Path: req.StateBackend.Path, + Registry: managedplugin.Registry(req.StateBackend.Registry), + Version: req.StateBackend.Version, + }, opts...) + if err != nil { + return status.Errorf(codes.Internal, "failed to create state plugin: %v", err) + } + stateClient, err := newStateClient(ctx, statePlugin.Conn, *req.StateBackend) + if err != nil { + return status.Errorf(codes.Internal, "failed to create state client: %v", err) + } + syncOptions.StateBackend = stateClient + } + go func() { defer close(records) err := s.Plugin.Sync(ctx, sourceName, req.SyncTime.AsTime(), syncOptions, records) @@ -172,7 +198,6 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr case pb.MIGRATE_MODE_FORCE: migrateMode = plugin.MigrateModeForced } - // switch req. return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } @@ -317,3 +342,7 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { setCQIDAsPrimaryKeysForTables(table.Relations) } } + +func (s *Server) Close(ctx context.Context, _ *pb.Close_Request) (*pb.Close_Response, error) { + return &pb.Close_Response{}, s.Plugin.Close(ctx) +} \ No newline at end of file diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go new file mode 100644 index 0000000000..4b7a43c297 --- /dev/null +++ b/internal/servers/plugin/v3/state.go @@ -0,0 +1,165 @@ +package plugin + +import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" + pbDiscovery "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/state" + "golang.org/x/exp/slices" + "google.golang.org/grpc" +) + +const stateTablePrefix = "cq_state_" +const keyColumn = "key" +const valueColumn = "value" + +type ClientV3 struct { + client pbPlugin.PluginClient + encodedTables [][]byte + mem map[string]string + keys []string + values []string +} + +func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.StateBackendSpec) (state.Client, error) { + discoveryClient := pbDiscovery.NewDiscoveryClient(conn) + versions, err := discoveryClient.GetVersions(ctx, &pbDiscovery.GetVersions_Request{}) + if err != nil { + return nil, err + } + if !slices.Contains(versions.Versions, 3) { + return nil, fmt.Errorf("please upgrade your state backend plugin") + } + + c := &ClientV3{ + client: pbPlugin.NewPluginClient(conn), + mem: make(map[string]string), + keys: make([]string, 0), + values: make([]string, 0), + } + name := spec.Name + table := &schema.Table{ + Name: stateTablePrefix + name, + Columns: []schema.Column{ + { + Name: keyColumn, + Type: arrow.BinaryTypes.String, + PrimaryKey: true, + }, + { + Name: valueColumn, + Type: arrow.BinaryTypes.String, + }, + }, + } + tables := schema.Tables{table} + c.encodedTables, err = tables.ToArrowSchemas().Encode() + if err != nil { + return nil, err + } + + if _, err := c.client.Init(ctx, &pbPlugin.Init_Request{ + Spec: spec.Spec, + }); err != nil { + return nil, err + } + + if _, err := c.client.Migrate(ctx, &pbPlugin.Migrate_Request{ + Tables: c.encodedTables, + MigrateMode: pbPlugin.MIGRATE_MODE_SAFE, + }); err != nil { + return nil, err + } + + syncClient, err := c.client.Sync(ctx, &pbPlugin.Sync_Request{ + Tables: []string{stateTablePrefix + name}, + }) + if err != nil { + return nil, err + } + for { + res, err := syncClient.Recv() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + rdr, err := ipc.NewReader(bytes.NewReader(res.Resource)) + if err != nil { + return nil, err + } + for { + record, err := rdr.Read() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + keys := record.Columns()[0].(*array.String) + values := record.Columns()[1].(*array.String) + for i := 0; i < keys.Len(); i++ { + c.mem[keys.Value(i)] = values.Value(i) + } + } + } + return c, nil +} + + +func (c *ClientV3) SetKey(ctx context.Context, key string, value string) error { + c.mem[key] = value + return nil +} + +func (c *ClientV3) flush(ctx context.Context) error { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, nil) + for k, v := range c.mem { + bldr.Field(0).(*array.StringBuilder).Append(k) + bldr.Field(1).(*array.StringBuilder).Append(v) + } + rec := bldr.NewRecord() + var buf bytes.Buffer + wrtr := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) + if err := wrtr.Write(rec); err != nil { + return err + } + if err := wrtr.Close(); err != nil { + return err + } + writeClient, err := c.client.Write(ctx) + if err != nil { + return err + } + if err := writeClient.Send(&pbPlugin.Write_Request{ + WriteMode: pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE, + }); err != nil { + return err + } + if err := writeClient.Send(&pbPlugin.Write_Request{ + Resource: buf.Bytes(), + }); err != nil { + return err + } + if _, err := writeClient.CloseAndRecv(); err != nil { + return err + } + return nil +} + +func (c *ClientV3) GetKey(ctx context.Context, key string) (string, error) { + if val, ok := c.mem[key]; ok { + return val, nil + } + return "", fmt.Errorf("key not found") +} \ No newline at end of file diff --git a/internal/state/state.go b/internal/state/state.go deleted file mode 100644 index 1dc00b79f9..0000000000 --- a/internal/state/state.go +++ /dev/null @@ -1,27 +0,0 @@ -package state - -import "context" - -type Client struct { - // managedPlugin managedplugin.Client -} - -// func NewState(ctx context.Context, managedPlugin managedplugin.Client) *Client { -// return &Client{ -// managedPlugin: managedPlugin, -// } -// c := pbPlugin.NewPluginClient(managedPlugin.Conn) -// c.Write(ctx, ) -// } - -func NewState(spec any) *Client { - return &Client{} -} - -func (* Client) SetKey(ctx context.Context, key string, value string) error { - return nil -} - -func (* Client) GetKey(ctx context.Context, key string) (string, error) { - return "", nil -} \ No newline at end of file diff --git a/plugin/memdb.go b/plugin/memdb.go index 4a99073935..8c23b430a3 100644 --- a/plugin/memdb.go +++ b/plugin/memdb.go @@ -84,7 +84,11 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, res chan<- arrow.Record) error { +func (c *client) NewManagedSyncClient(context.Context, SyncOptions) (ManagedSyncClient, error) { + return nil, fmt.Errorf("not supported") +} + +func (c *client) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { diff --git a/plugin/options.go b/plugin/options.go index fa2b3b53f2..6ad134b843 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -3,6 +3,7 @@ package plugin import ( "bytes" "context" + "fmt" "time" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -23,6 +24,32 @@ func (m MigrateMode) String() string { return migrateModeStrings[m] } +type Registry int + +const ( + RegistryGithub Registry = iota + RegistryLocal + RegistryGrpc +) + +func (r Registry) String() string { + return [...]string{"github", "local", "grpc"}[r] +} + +func RegistryFromString(s string) (Registry, error) { + switch s { + case "github": + return RegistryGithub, nil + case "local": + return RegistryLocal, nil + case "grpc": + return RegistryGrpc, nil + default: + return RegistryGithub, fmt.Errorf("unknown registry %s", s) + } +} + + type WriteMode int const ( diff --git a/plugin/plugin.go b/plugin/plugin.go index 754fd424d6..aa456bdaac 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -29,9 +29,13 @@ type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) -type Client interface { +type ManagedSyncClient interface { ID() string - Sync(ctx context.Context, res chan<- arrow.Record) error +} + +type Client interface { + NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) + Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error @@ -42,15 +46,15 @@ type Client interface { type UnimplementedWriter struct{} -func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables) error { +func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { return fmt.Errorf("not implemented") } -func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error { +func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { return fmt.Errorf("not implemented") } -func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error { +func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error { return fmt.Errorf("not implemented") } @@ -60,7 +64,7 @@ func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, res chan<- arrow.Record) error { +func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } diff --git a/plugin/plugin_managed_sync.go b/plugin/plugin_managed_sync.go new file mode 100644 index 0000000000..2f52685513 --- /dev/null +++ b/plugin/plugin_managed_sync.go @@ -0,0 +1,45 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (p *Plugin) managedSync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { + if len(p.sessionTables) == 0 { + return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") + } + + managedClient, err := p.client.NewManagedSyncClient(ctx, options) + if err != nil { + return fmt.Errorf("failed to create managed sync client: %w", err) + } + + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch options.Scheduler { + case SchedulerDFS: + p.syncDfs(ctx, options, managedClient, p.sessionTables, resources) + case SchedulerRoundRobin: + p.syncRoundRobin(ctx, options, managedClient, p.sessionTables, resources) + default: + panic(fmt.Errorf("unknown scheduler %s", options.Scheduler)) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec + } + return nil +} \ No newline at end of file diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 8fadf11f6a..bb59759c0c 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -6,18 +6,20 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/state" + "github.com/google/uuid" ) + + type SyncOptions struct { Tables []string SkipTables []string Concurrency int64 Scheduler Scheduler DeterministicCQID bool + StateBackend state.Client } // Tables returns all tables supported by this source plugin @@ -52,6 +54,10 @@ func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName strin return p.client.Read(ctx, table, sourceName, res) } +func (p *Plugin) Acknowledge(ctx context.Context, recordUUID uuid.UUID) error { + return nil +} + func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) @@ -68,40 +74,21 @@ func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.T } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, syncOptions SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() p.syncTime = syncTime - startTime := time.Now() + if p.unmanagedSync { - if err := p.client.Sync(ctx, res); err != nil { + if err := p.client.Sync(ctx, options, res); err != nil { return fmt.Errorf("failed to sync unmanaged client: %w", err) } } else { - if len(p.sessionTables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - resources := make(chan *schema.Resource) - go func() { - defer close(resources) - switch syncOptions.Scheduler { - case SchedulerDFS: - p.syncDfs(ctx, syncOptions, p.client, p.sessionTables, resources) - case SchedulerRoundRobin: - p.syncRoundRobin(ctx, syncOptions, p.client, p.sessionTables, resources) - default: - panic(fmt.Errorf("unknown scheduler %s", syncOptions.Scheduler)) - } - }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - res <- rec + if err := p.managedSync(ctx, sourceName, syncTime, options, res); err != nil { + return fmt.Errorf("failed to sync managed client: %w", err) } } diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 4f13e1bdab..af37b8df48 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -10,6 +10,9 @@ import ( ) func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { + if p.client == nil { + return fmt.Errorf("plugin is not initialized") + } return p.client.Migrate(ctx, tables, migrateMode) } diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go index ae074503ab..bd87c50aeb 100644 --- a/plugin/scheduler_dfs.go +++ b/plugin/scheduler_dfs.go @@ -14,7 +14,7 @@ import ( "golang.org/x/sync/semaphore" ) -func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go index b4c7592fcf..a0be17938d 100644 --- a/plugin/scheduler_round_robin.go +++ b/plugin/scheduler_round_robin.go @@ -13,7 +13,7 @@ type tableClient struct { client schema.ClientMeta } -func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency diff --git a/plugin/state.go b/plugin/state.go new file mode 100644 index 0000000000..6831e406a6 --- /dev/null +++ b/plugin/state.go @@ -0,0 +1 @@ +package plugin \ No newline at end of file diff --git a/serve/plugin.go b/serve/plugin.go index f4dbfe9e5b..483312b871 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -15,8 +15,10 @@ import ( pbDestinationV0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" pbDestinationV1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" + pbdiscoveryv1 "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" pbv3 "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" discoveryServerV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/discovery/v0" + discoveryServerV1 "github.com/cloudquery/plugin-sdk/v4/internal/servers/discovery/v1" serverDestinationV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" serverDestinationV1 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v1" @@ -159,6 +161,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { pbv3.RegisterPluginServer(grpcServer, &serversv3.Server{ Plugin: s.plugin, Logger: logger, + NoSentry: noSentry, }) if s.destinationV0V1Server { pbDestinationV1.RegisterDestinationServer(grpcServer, &serverDestinationV1.Server{ @@ -173,6 +176,9 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { pbdiscoveryv0.RegisterDiscoveryServer(grpcServer, &discoveryServerV0.Server{ Versions: []string{"v0", "v1", "v2", "v3"}, }) + pbdiscoveryv1.RegisterDiscoveryServer(grpcServer, &discoveryServerV1.Server{ + Versions: []uint64{0,1,2,3}, + }) version := s.plugin.Version() diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 9135012f0c..e09308b704 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -58,6 +58,10 @@ func (*testExecutionClient) Close(ctx context.Context) error { return nil } +func (c *testExecutionClient) NewManagedSyncClient(ctx context.Context, options plugin.SyncOptions) (plugin.ManagedSyncClient, error) { + return c, nil +} + func newTestExecutionClient(context.Context, zerolog.Logger, any) (plugin.Client, error) { return &testExecutionClient{}, nil } diff --git a/serve/state_v3_test.go b/serve/state_v3_test.go new file mode 100644 index 0000000000..d1442c2e88 --- /dev/null +++ b/serve/state_v3_test.go @@ -0,0 +1,57 @@ +package serve + +import ( + "context" + "sync" + "testing" + + "github.com/cloudquery/plugin-sdk/v4/internal/state" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func TestStateV3(t *testing.T) { + p := plugin.NewPlugin("memdb", "v1.0.0", plugin.NewMemDBClient) + srv := Plugin(p, WithArgs("serve"), WithTestListener()) + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + var wg sync.WaitGroup + wg.Add(1) + var serverErr error + go func() { + defer wg.Done() + serverErr = srv.Serve(ctx) + }() + defer func() { + cancel() + wg.Wait() + }() + + // https://stackoverflow.com/questions/42102496/testing-a-grpc-service + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + if err != nil { + t.Fatalf("Failed to dial bufnet: %v", err) + } + + stateClient, err := state.NewClient(ctx, "test", conn) + if err != nil { + t.Fatalf("Failed to create state client: %v", err) + } + if err := stateClient.SetKey(ctx, "testKey", "testValue"); err != nil { + t.Fatalf("Failed to set key: %v", err) + } + key, err := stateClient.GetKey(ctx, "testKey") + if err != nil { + t.Fatalf("Failed to get key: %v", err) + } + if key != "testValue" { + t.Fatalf("Unexpected key value: %v", key) + } + + cancel() + wg.Wait() + if serverErr != nil { + t.Fatal(serverErr) + } +} \ No newline at end of file diff --git a/state/state.go b/state/state.go new file mode 100644 index 0000000000..e92423df3c --- /dev/null +++ b/state/state.go @@ -0,0 +1,8 @@ +package state + +import "context" + +type Client interface { + SetKey(ctx context.Context, key string, value string) error + GetKey(ctx context.Context, key string) (string, error) +} \ No newline at end of file From 04f8c28bbee97281063459f18ee5e7f6b1d79e79 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:25:54 +0300 Subject: [PATCH 009/125] more wip --- backend/backend.go | 12 -- internal/backends/local/local.go | 157 ------------------ internal/backends/local/spec.go | 12 -- internal/backends/nop/nop.go | 23 --- .../servers/destination/v0/destinations.go | 4 +- .../servers/destination/v1/destinations.go | 2 +- internal/servers/plugin/v3/plugin.go | 25 ++- internal/servers/plugin/v3/state.go | 23 ++- plugin/memdb_test.go | 34 ++-- plugin/options.go | 5 +- plugin/plugin.go | 19 +-- plugin/plugin_managed_source_test.go | 5 +- plugin/plugin_managed_sync.go | 5 +- plugin/plugin_reader.go | 97 +++++++---- plugin/plugin_test.go | 12 +- plugin/state.go | 1 - plugin/testing_overwrite_deletestale.go | 30 +++- plugin/testing_sync.go | 3 +- plugin/testing_write.go | 4 +- plugin/testing_write_append.go | 6 +- plugin/testing_write_migrate.go | 15 +- plugin/testing_write_overwrite.go | 12 +- serve/destination_v0_test.go | 4 +- serve/destination_v1_test.go | 4 +- serve/plugin.go | 6 +- serve/state_v3_test.go | 2 +- state/state.go | 2 +- 27 files changed, 194 insertions(+), 330 deletions(-) delete mode 100644 backend/backend.go delete mode 100644 internal/backends/local/local.go delete mode 100644 internal/backends/local/spec.go delete mode 100644 internal/backends/nop/nop.go delete mode 100644 plugin/state.go diff --git a/backend/backend.go b/backend/backend.go deleted file mode 100644 index fc4e639233..0000000000 --- a/backend/backend.go +++ /dev/null @@ -1,12 +0,0 @@ -package backend - -import "context" - -type Backend interface { - // Set sets the value for the given table and client id. - Set(ctx context.Context, table, clientID, value string) error - // Get returns the value for the given table and client id. - Get(ctx context.Context, table, clientID string) (string, error) - // Close closes the backend. - Close(ctx context.Context) error -} diff --git a/internal/backends/local/local.go b/internal/backends/local/local.go deleted file mode 100644 index f593260dde..0000000000 --- a/internal/backends/local/local.go +++ /dev/null @@ -1,157 +0,0 @@ -package local - -import ( - "context" - "encoding/json" - "fmt" - "io" - "os" - "path" - "strings" - "sync" - - "github.com/cloudquery/plugin-pb-go/specs/v0" -) - -type Local struct { - sourceName string - spec Spec - tables map[string]entries // table -> key -> value - tablesLock sync.RWMutex -} - -type entries map[string]string - -func New(sourceSpec specs.Source) (*Local, error) { - spec := Spec{} - err := sourceSpec.UnmarshalBackendSpec(&spec) - if err != nil { - return nil, err - } - spec.SetDefaults() - - l := &Local{ - sourceName: sourceSpec.Name, - spec: spec, - } - tables, err := l.loadPreviousState() - if err != nil { - return nil, err - } - if tables == nil { - tables = map[string]entries{} - } - l.tables = tables - return l, nil -} - -func (l *Local) loadPreviousState() (map[string]entries, error) { - files, err := os.ReadDir(l.spec.Path) - if os.IsNotExist(err) { - return nil, nil - } - var tables = map[string]entries{} - for _, f := range files { - if f.IsDir() || !f.Type().IsRegular() { - continue - } - name := f.Name() - if !strings.HasSuffix(name, ".json") || !strings.HasPrefix(name, l.sourceName+"-") { - continue - } - table, kv, err := l.readFile(name) - if err != nil { - return nil, err - } - tables[table] = kv - } - return tables, nil -} - -func (l *Local) readFile(name string) (table string, kv entries, err error) { - p := path.Join(l.spec.Path, name) - f, err := os.Open(p) - if err != nil { - return "", nil, fmt.Errorf("failed to open state file: %w", err) - } - b, err := io.ReadAll(f) - if err != nil { - return "", nil, fmt.Errorf("failed to read state file: %w", err) - } - err = f.Close() - if err != nil { - return "", nil, fmt.Errorf("failed to close state file: %w", err) - } - err = json.Unmarshal(b, &kv) - if err != nil { - return "", nil, fmt.Errorf("failed to unmarshal state file: %w", err) - } - table = strings.TrimPrefix(strings.TrimSuffix(name, ".json"), l.sourceName+"-") - return table, kv, nil -} - -func (l *Local) Get(_ context.Context, table, clientID string) (string, error) { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - if _, ok := l.tables[table]; !ok { - return "", nil - } - return l.tables[table][clientID], nil -} - -func (l *Local) Set(_ context.Context, table, clientID, value string) error { - l.tablesLock.Lock() - defer l.tablesLock.Unlock() - - if _, ok := l.tables[table]; !ok { - l.tables[table] = map[string]string{} - } - prev := l.tables[table][clientID] - l.tables[table][clientID] = value - if prev != value { - // only flush if the value changed - return l.flushTable(table, l.tables[table]) - } - return nil -} - -func (l *Local) Close(_ context.Context) error { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - return l.flush() -} - -func (l *Local) flush() error { - for table, kv := range l.tables { - err := l.flushTable(table, kv) - if err != nil { - return err - } - } - return nil -} - -func (l *Local) flushTable(table string, entries entries) error { - if len(entries) == 0 { - return nil - } - - err := os.MkdirAll(l.spec.Path, 0755) - if err != nil { - return fmt.Errorf("failed to create state directory %v: %w", l.spec.Path, err) - } - - b, err := json.MarshalIndent(entries, "", " ") - if err != nil { - return fmt.Errorf("failed to marshal state for table %v: %w", table, err) - } - f := path.Join(l.spec.Path, l.sourceName+"-"+table+".json") - err = os.WriteFile(f, b, 0644) - if err != nil { - return fmt.Errorf("failed to write state for table %v: %w", table, err) - } - - return nil -} diff --git a/internal/backends/local/spec.go b/internal/backends/local/spec.go deleted file mode 100644 index f2b7040c1d..0000000000 --- a/internal/backends/local/spec.go +++ /dev/null @@ -1,12 +0,0 @@ -package local - -type Spec struct { - // Path is the path to the local directory. - Path string `json:"path"` -} - -func (s *Spec) SetDefaults() { - if s.Path == "" { - s.Path = ".cq/state" - } -} diff --git a/internal/backends/nop/nop.go b/internal/backends/nop/nop.go deleted file mode 100644 index 45e713608a..0000000000 --- a/internal/backends/nop/nop.go +++ /dev/null @@ -1,23 +0,0 @@ -package nop - -import "context" - -func New() *Backend { - return &Backend{} -} - -// Backend can be used in cases where no backend is specified to avoid the need to check for nil -// pointers in all resolvers. -type Backend struct{} - -func (*Backend) Set(_ context.Context, _, _, _ string) error { - return nil -} - -func (*Backend) Get(_ context.Context, _, _ string) (string, error) { - return "", nil -} - -func (*Backend) Close(_ context.Context) error { - return nil -} diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 93fe380574..7419cf10f2 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -53,7 +53,7 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( case specs.MigrateModeSafe: s.migrateMode = plugin.MigrateModeSafe case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForced + s.migrateMode = plugin.MigrateModeForce } return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) } @@ -84,7 +84,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr case specs.MigrateModeSafe: migrateMode = plugin.MigrateModeSafe case specs.MigrateModeForced: - migrateMode = plugin.MigrateModeForced + migrateMode = plugin.MigrateModeForce default: return nil, status.Errorf(codes.InvalidArgument, "invalid migrate mode: %v", s.spec.MigrateMode) } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index c65c6b4256..d53e5e2ee9 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -46,7 +46,7 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. case specs.MigrateModeSafe: s.migrateMode = plugin.MigrateModeSafe case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForced + s.migrateMode = plugin.MigrateModeForce } return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index b50de13253..53b02b1cd5 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -28,10 +28,10 @@ const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB type Server struct { pb.UnimplementedPluginServer - Plugin *plugin.Plugin - Logger zerolog.Logger + Plugin *plugin.Plugin + Logger zerolog.Logger Directory string - NoSentry bool + NoSentry bool } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -93,7 +93,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { syncOptions.Scheduler = plugin.SchedulerRoundRobin } - sourceName := req.SourceName + // sourceName := req.SourceName if req.StateBackend != nil { opts := []managedplugin.Option{ @@ -104,9 +104,9 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { opts = append(opts, managedplugin.WithNoSentry()) } statePlugin, err := managedplugin.NewClient(ctx, managedplugin.Config{ - Path: req.StateBackend.Path, + Path: req.StateBackend.Path, Registry: managedplugin.Registry(req.StateBackend.Registry), - Version: req.StateBackend.Version, + Version: req.StateBackend.Version, }, opts...) if err != nil { return status.Errorf(codes.Internal, "failed to create state plugin: %v", err) @@ -117,10 +117,17 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { } syncOptions.StateBackend = stateClient } + if req.SyncTime != nil { + syncOptions.SyncTime = req.SyncTime.AsTime() + } + + if req.SourceName != "" { + syncOptions.SourceName = req.SourceName + } go func() { defer close(records) - err := s.Plugin.Sync(ctx, sourceName, req.SyncTime.AsTime(), syncOptions, records) + err := s.Plugin.Sync(ctx, syncOptions, records) if err != nil { syncErr = fmt.Errorf("failed to sync records: %w", err) } @@ -196,7 +203,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr case pb.MIGRATE_MODE_SAFE: migrateMode = plugin.MigrateModeSafe case pb.MIGRATE_MODE_FORCE: - migrateMode = plugin.MigrateModeForced + migrateMode = plugin.MigrateModeForce } return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } @@ -345,4 +352,4 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { func (s *Server) Close(ctx context.Context, _ *pb.Close_Request) (*pb.Close_Response, error) { return &pb.Close_Response{}, s.Plugin.Close(ctx) -} \ No newline at end of file +} diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index 4b7a43c297..be152297b7 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -23,11 +23,11 @@ const keyColumn = "key" const valueColumn = "value" type ClientV3 struct { - client pbPlugin.PluginClient + client pbPlugin.PluginClient encodedTables [][]byte - mem map[string]string - keys []string - values []string + mem map[string]string + keys []string + values []string } func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.StateBackendSpec) (state.Client, error) { @@ -42,8 +42,8 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St c := &ClientV3{ client: pbPlugin.NewPluginClient(conn), - mem: make(map[string]string), - keys: make([]string, 0), + mem: make(map[string]string), + keys: make([]string, 0), values: make([]string, 0), } name := spec.Name @@ -51,8 +51,8 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St Name: stateTablePrefix + name, Columns: []schema.Column{ { - Name: keyColumn, - Type: arrow.BinaryTypes.String, + Name: keyColumn, + Type: arrow.BinaryTypes.String, PrimaryKey: true, }, { @@ -74,7 +74,7 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St } if _, err := c.client.Migrate(ctx, &pbPlugin.Migrate_Request{ - Tables: c.encodedTables, + Tables: c.encodedTables, MigrateMode: pbPlugin.MIGRATE_MODE_SAFE, }); err != nil { return nil, err @@ -116,7 +116,6 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St return c, nil } - func (c *ClientV3) SetKey(ctx context.Context, key string, value string) error { c.mem[key] = value return nil @@ -157,9 +156,9 @@ func (c *ClientV3) flush(ctx context.Context) error { return nil } -func (c *ClientV3) GetKey(ctx context.Context, key string) (string, error) { +func (c *ClientV3) GetKey(ctx context.Context, key string) (string, error) { if val, ok := c.mem[key]; ok { return val, nil } return "", fmt.Errorf("key not found") -} \ No newline at end of file +} diff --git a/plugin/memdb_test.go b/plugin/memdb_test.go index d89a70ac0d..2f9f54a506 100644 --- a/plugin/memdb_test.go +++ b/plugin/memdb_test.go @@ -12,19 +12,19 @@ import ( ) var migrateStrategyOverwrite = MigrateStrategy{ - AddColumn: pbPlugin.WriteSpec_FORCE, - AddColumnNotNull: pbPlugin.WriteSpec_FORCE, - RemoveColumn: pbPlugin.WriteSpec_FORCE, - RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, - ChangeColumn: pbPlugin.WriteSpec_FORCE, + AddColumn: MigrateModeForce, + AddColumnNotNull: MigrateModeForce, + RemoveColumn: MigrateModeForce, + RemoveColumnNotNull: MigrateModeForce, + ChangeColumn: MigrateModeForce, } var migrateStrategyAppend = MigrateStrategy{ - AddColumn: pbPlugin.WriteSpec_FORCE, - AddColumnNotNull: pbPlugin.WriteSpec_FORCE, - RemoveColumn: pbPlugin.WriteSpec_FORCE, - RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, - ChangeColumn: pbPlugin.WriteSpec_FORCE, + AddColumn: MigrateModeForce, + AddColumnNotNull: MigrateModeForce, + RemoveColumn: MigrateModeForce, + RemoveColumnNotNull: MigrateModeForce, + ChangeColumn: MigrateModeForce, } func TestPluginUnmanagedClient(t *testing.T) { @@ -33,7 +33,7 @@ func TestPluginUnmanagedClient(t *testing.T) { func() *Plugin { return NewPlugin("test", "development", NewMemDBClient) }, - pbPlugin.Spec{}, + nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -46,7 +46,7 @@ func TestPluginManagedClient(t *testing.T) { func() *Plugin { return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter()) }, - pbPlugin.Spec{}, + nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -59,7 +59,7 @@ func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), WithDefaultBatchSize(1), WithDefaultBatchSizeBytes(1)) - }, pbPlugin.Spec{}, + }, nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -73,7 +73,7 @@ func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { WithDefaultBatchSize(100000000), WithDefaultBatchSizeBytes(100000000)) }, - pbPlugin.Spec{}, + nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -99,7 +99,7 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() p := NewPlugin("test", "development", NewMemDBClientErrOnNew) - err := p.Init(ctx, pbPlugin.Spec{}) + err := p.Init(ctx, nil) if err == nil { t.Fatal("expected error") @@ -110,9 +110,7 @@ func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) p := NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{}, - }); err != nil { + if err := p.Init(ctx, nil); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) diff --git a/plugin/options.go b/plugin/options.go index 6ad134b843..11841fa53b 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -13,11 +13,11 @@ type MigrateMode int const ( MigrateModeSafe MigrateMode = iota - MigrateModeForced + MigrateModeForce ) var ( - migrateModeStrings = []string{"safe", "forced"} + migrateModeStrings = []string{"safe", "force"} ) func (m MigrateMode) String() string { @@ -49,7 +49,6 @@ func RegistryFromString(s string) (Registry, error) { } } - type WriteMode int const ( diff --git a/plugin/plugin.go b/plugin/plugin.go index aa456bdaac..ec5d00b1f1 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,8 +7,6 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs/v0" - "github.com/cloudquery/plugin-sdk/v4/backend" "github.com/cloudquery/plugin-sdk/v4/caser" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -21,12 +19,6 @@ const ( defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB ) -type Options struct { - Backend backend.Backend -} - -type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) - type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type ManagedSyncClient interface { @@ -40,7 +32,7 @@ type Client interface { WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error - Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error + // Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error } @@ -106,8 +98,6 @@ type Plugin struct { client Client // sessionTables are the sessionTables schema.Tables - // backend is the backend used to store the cursor state - backend backend.Backend // spec is the spec the client was initialized with spec any // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id @@ -279,12 +269,5 @@ func (p *Plugin) Close(ctx context.Context) error { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - if p.backend != nil { - err := p.backend.Close(ctx) - if err != nil { - return fmt.Errorf("failed to close backend: %w", err) - } - p.backend = nil - } return p.client.Close(ctx) } diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index 8520c3a7be..c2071cc977 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -9,7 +9,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" @@ -359,7 +358,7 @@ func (testRand) Read(p []byte) (n int, err error) { func TestManagedSync(t *testing.T) { uuid.SetRand(testRand{}) - for _, scheduler := range plugin.AllSchedulers { + for _, scheduler := range AllSchedulers { for _, tc := range syncTestCases { tc := tc tc.table = tc.table.Copy(nil) @@ -370,7 +369,7 @@ func TestManagedSync(t *testing.T) { } } -func testSyncTable(t *testing.T, tc syncTestCase, scheduler plugin.Scheduler, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, scheduler Scheduler, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, diff --git a/plugin/plugin_managed_sync.go b/plugin/plugin_managed_sync.go index 2f52685513..8d4b113ebb 100644 --- a/plugin/plugin_managed_sync.go +++ b/plugin/plugin_managed_sync.go @@ -3,7 +3,6 @@ package plugin import ( "context" "fmt" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -12,7 +11,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (p *Plugin) managedSync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) managedSync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { if len(p.sessionTables) == 0 { return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") } @@ -42,4 +41,4 @@ func (p *Plugin) managedSync(ctx context.Context, sourceName string, syncTime ti res <- rec } return nil -} \ No newline at end of file +} diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index bb59759c0c..a01858eace 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -8,10 +8,23 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" - "github.com/google/uuid" + "github.com/rs/zerolog" ) +type Operation int +const ( + OperationEqual Operation = iota + OperationNotEqual + OperationGreaterThan + OperationLessThan +) + +type WhereClause struct { + ColumnName string + Operation Operation + Value string +} type SyncOptions struct { Tables []string @@ -19,7 +32,39 @@ type SyncOptions struct { Concurrency int64 Scheduler Scheduler DeterministicCQID bool - StateBackend state.Client + // SyncTime if specified then this will be add to every table as _sync_time column + SyncTime time.Time + // If spceified then this will be added to every table as _source_name column + SourceName string + StateBackend state.Client +} + +type ReadOnlyClient interface { + NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) + Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error + Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error + Close(ctx context.Context) error +} + +type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (ReadOnlyClient, error) + +// NewReadOnlyPlugin returns a new CloudQuery Plugin with the given name, version and implementation. +// this plugin will only support read operations. For ReadWrite plugin use NewPlugin. +func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { + newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { + readOnlyClient, err := newClient(ctx, logger, any) + if err != nil { + return nil, err + } + wrapperClient := struct { + ReadOnlyClient + UnimplementedWriter + }{ + ReadOnlyClient: readOnlyClient, + } + return wrapperClient, nil + } + return NewPlugin(name, version, newClientWrapper, options...) } // Tables returns all tables supported by this source plugin @@ -35,35 +80,31 @@ func (p *Plugin) DynamicTables() schema.Tables { return p.sessionTables } -func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { - var readErr error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - readErr = p.Read(ctx, table, sourceName, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, readErr -} - -func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return p.client.Read(ctx, table, sourceName, res) -} +// func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { +// var readErr error +// ch := make(chan arrow.Record) +// go func() { +// defer close(ch) +// readErr = p.Read(ctx, table, sourceName, ch) +// }() +// // nolint:prealloc +// var resources []arrow.Record +// for resource := range ch { +// resources = append(resources, resource) +// } +// return resources, readErr +// } -func (p *Plugin) Acknowledge(ctx context.Context, recordUUID uuid.UUID) error { - return nil -} +// func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { +// return p.client.Read(ctx, table, sourceName, res) +// } -func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions) ([]arrow.Record, error) { +func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) go func() { defer close(ch) - err = p.Sync(ctx, sourceName, syncTime, options, ch) + err = p.Sync(ctx, options, ch) }() // nolint:prealloc var resources []arrow.Record @@ -74,12 +115,12 @@ func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.T } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - p.syncTime = syncTime + p.syncTime = options.SyncTime startTime := time.Now() if p.unmanagedSync { @@ -87,7 +128,7 @@ func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time return fmt.Errorf("failed to sync unmanaged client: %w", err) } } else { - if err := p.managedSync(ctx, sourceName, syncTime, options, res); err != nil { + if err := p.managedSync(ctx, options, res); err != nil { return fmt.Errorf("failed to sync managed client: %w", err) } } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index 04993d5b97..6fe3d0aa7e 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -30,7 +30,9 @@ func TestPluginUnmanagedSync(t *testing.T) { if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, testRecords); err != nil { t.Fatal(err) } - gotRecords, err := p.readAll(ctx, testTable, "test") + gotRecords, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{testTable.Name}, + }) if err != nil { t.Fatal(err) } @@ -40,7 +42,9 @@ func TestPluginUnmanagedSync(t *testing.T) { if !array.RecordEqual(testRecords[0], gotRecords[0]) { t.Fatal("records are not equal") } - records, err := p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + records, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{testTable.Name}, + }) if err != nil { t.Fatal(err) } @@ -56,7 +60,9 @@ func TestPluginUnmanagedSync(t *testing.T) { if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { t.Fatal(err) } - records, err = p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + records, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{testTable.Name}, + }) if err != nil { t.Fatal(err) } diff --git a/plugin/state.go b/plugin/state.go deleted file mode 100644 index 6831e406a6..0000000000 --- a/plugin/state.go +++ /dev/null @@ -1 +0,0 @@ -package plugin \ No newline at end of file diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go index 300900e287..6ac079ff13 100644 --- a/plugin/testing_overwrite_deletestale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -47,7 +47,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } sortRecordsBySyncTime(table, resources) - resourcesRead, err := p.readAll(ctx, table, sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -71,7 +75,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } // read from incremental table - resourcesRead, err = p.readAll(ctx, incTable, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -99,7 +107,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte return fmt.Errorf("failed to write all second time: %w", err) } - resourcesRead, err = p.readAll(ctx, table, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } @@ -116,7 +128,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte return fmt.Errorf("after overwrite expected first resource to be different. diff: %s", diff) } - resourcesRead, err = p.readAll(ctx, table, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } @@ -136,7 +152,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte // we expect the incremental table to still have 3 resources, because delete-stale should // not apply there - resourcesRead, err = p.readAll(ctx, incTable, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{incTable.Name}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all from incremental table: %w", err) } diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 59b1cd5a9e..01a09c98b6 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -37,7 +36,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, sourceName string, spec any, o go func() { defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), sourceName, time.Now(), options, resourcesChannel) + syncErr = plugin.Sync(context.Background(), options, resourcesChannel) }() syncedResources := make([]arrow.Record, 0) diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 501ff39273..e7e50ef76f 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -227,7 +227,7 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, test if suite.tests.SkipMigrateOverwriteForce { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeForced + migrateMode := MigrateModeForce writeMode := WriteModeOverwrite suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) @@ -263,7 +263,7 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, test if suite.tests.SkipMigrateAppendForce { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeForced + migrateMode := MigrateModeForce writeMode := WriteModeAppend suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) }) diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go index 68efc4ca8b..d4ccdd15d4 100644 --- a/plugin/testing_write_append.go +++ b/plugin/testing_write_append.go @@ -50,7 +50,11 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } } - resourcesRead, err := p.readAll(ctx, tables[0], sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index d59da7fc8b..978c5951a2 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -56,7 +56,10 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. stripNullsFromLists(resource2) } - resourcesRead, err := p.readAll(ctx, target, sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{target.Name}, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -93,7 +96,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( testOpts PluginTestSuiteRunnerOptions, ) { t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.AddColumn == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -129,7 +132,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.AddColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -163,7 +166,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.RemoveColumn == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -196,7 +199,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.RemoveColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -230,7 +233,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.ChangeColumn == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go index 34e89e8b2d..fd851a6e2e 100644 --- a/plugin/testing_write_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -43,7 +43,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, if testOpts.IgnoreNullsInLists { stripNullsFromLists(resources) } - resourcesRead, err := p.readAll(ctx, table, sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -85,7 +89,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, if testOpts.IgnoreNullsInLists { stripNullsFromLists(updatedResource) } - resourcesRead, err = p.readAll(ctx, table, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index a1ba81f794..181474c3c4 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -130,7 +130,9 @@ func TestDestination(t *testing.T) { // serversDestination table := serversDestination.TableV2ToV3(tableV2) readCh := make(chan arrow.Record, 1) - if err := p.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Sync(ctx, plugin.SyncOptions{ + Tables: []string{tableName}, + }, readCh); err != nil { t.Fatal(err) } close(readCh) diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index c65320eea1..3f15930022 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -130,7 +130,9 @@ func TestDestinationV1(t *testing.T) { } // serversDestination readCh := make(chan arrow.Record, 1) - if err := p.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Sync(ctx, plugin.SyncOptions{ + Tables: []string{tableName}, + }, readCh); err != nil { t.Fatal(err) } close(readCh) diff --git a/serve/plugin.go b/serve/plugin.go index 483312b871..f64d0ba1ec 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -159,8 +159,8 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { ) s.plugin.SetLogger(logger) pbv3.RegisterPluginServer(grpcServer, &serversv3.Server{ - Plugin: s.plugin, - Logger: logger, + Plugin: s.plugin, + Logger: logger, NoSentry: noSentry, }) if s.destinationV0V1Server { @@ -177,7 +177,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { Versions: []string{"v0", "v1", "v2", "v3"}, }) pbdiscoveryv1.RegisterDiscoveryServer(grpcServer, &discoveryServerV1.Server{ - Versions: []uint64{0,1,2,3}, + Versions: []uint64{0, 1, 2, 3}, }) version := s.plugin.Version() diff --git a/serve/state_v3_test.go b/serve/state_v3_test.go index d1442c2e88..f75d53353b 100644 --- a/serve/state_v3_test.go +++ b/serve/state_v3_test.go @@ -54,4 +54,4 @@ func TestStateV3(t *testing.T) { if serverErr != nil { t.Fatal(serverErr) } -} \ No newline at end of file +} diff --git a/state/state.go b/state/state.go index e92423df3c..55f070704e 100644 --- a/state/state.go +++ b/state/state.go @@ -5,4 +5,4 @@ import "context" type Client interface { SetKey(ctx context.Context, key string, value string) error GetKey(ctx context.Context, key string) (string, error) -} \ No newline at end of file +} From 645842c39c208fb51c7af26da2f434c7f7783359 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:31:07 +0300 Subject: [PATCH 010/125] more work --- plugin/plugin_reader.go | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index a01858eace..d02ca6bc75 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -11,21 +11,6 @@ import ( "github.com/rs/zerolog" ) -type Operation int - -const ( - OperationEqual Operation = iota - OperationNotEqual - OperationGreaterThan - OperationLessThan -) - -type WhereClause struct { - ColumnName string - Operation Operation - Value string -} - type SyncOptions struct { Tables []string SkipTables []string @@ -80,25 +65,6 @@ func (p *Plugin) DynamicTables() schema.Tables { return p.sessionTables } -// func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { -// var readErr error -// ch := make(chan arrow.Record) -// go func() { -// defer close(ch) -// readErr = p.Read(ctx, table, sourceName, ch) -// }() -// // nolint:prealloc -// var resources []arrow.Record -// for resource := range ch { -// resources = append(resources, resource) -// } -// return resources, readErr -// } - -// func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { -// return p.client.Read(ctx, table, sourceName, res) -// } - func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) From e34b7f75a6efcdcf79b2eb2ea9a4418b1b91c230 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:17:16 +0300 Subject: [PATCH 011/125] more wip --- internal/servers/plugin/v3/plugin.go | 24 -- plugin/options.go | 6 - plugin/plugin.go | 2 - plugin/plugin_managed_sync.go | 44 --- plugin/plugin_reader.go | 11 +- plugin/scheduler.go | 163 ----------- scheduler/metrics.go | 125 ++++++++ scheduler/metrics_test.go | 37 +++ .../plugin_managed_source_test.go.backup | 2 +- scheduler/scheduler.go | 275 ++++++++++++++++++ {plugin => scheduler}/scheduler_dfs.go | 86 +++--- .../scheduler_round_robin.go | 48 +-- .../scheduler_round_robin_test.go | 9 +- 13 files changed, 515 insertions(+), 317 deletions(-) delete mode 100644 plugin/plugin_managed_sync.go delete mode 100644 plugin/scheduler.go create mode 100644 scheduler/metrics.go create mode 100644 scheduler/metrics_test.go rename plugin/plugin_managed_source_test.go => scheduler/plugin_managed_source_test.go.backup (99%) create mode 100644 scheduler/scheduler.go rename {plugin => scheduler}/scheduler_dfs.go (69%) rename {plugin => scheduler}/scheduler_round_robin.go (64%) rename {plugin => scheduler}/scheduler_round_robin_test.go (94%) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 53b02b1cd5..fa432af917 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -3,7 +3,6 @@ package plugin import ( "bytes" "context" - "encoding/json" "errors" "fmt" "io" @@ -163,29 +162,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { return syncErr } -func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMetrics_Response, error) { - // Aggregate metrics before sending to keep response size small. - // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 - m := s.Plugin.Metrics() - agg := &plugin.TableClientMetrics{} - for _, table := range m.TableClient { - for _, tableClient := range table { - agg.Resources += tableClient.Resources - agg.Errors += tableClient.Errors - agg.Panics += tableClient.Panics - } - } - b, err := json.Marshal(&plugin.Metrics{ - TableClient: map[string]map[string]*plugin.TableClientMetrics{"": {"": agg}}, - }) - if err != nil { - return nil, fmt.Errorf("failed to marshal source metrics: %w", err) - } - return &pb.GetMetrics_Response{ - Metrics: b, - }, nil -} - func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migrate_Response, error) { schemas, err := schema.NewSchemasFromBytes(req.Tables) if err != nil { diff --git a/plugin/options.go b/plugin/options.go index 11841fa53b..66a13b69f7 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -113,12 +113,6 @@ func WithNoInternalColumns() Option { } } -func WithUnmanagedSync() Option { - return func(p *Plugin) { - p.unmanagedSync = true - } -} - // WithTitleTransformer allows the plugin to control how table names get turned into titles for the // generated documentation. func WithTitleTransformer(t func(*schema.Table) string) Option { diff --git a/plugin/plugin.go b/plugin/plugin.go index ec5d00b1f1..44a8731fec 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -26,13 +26,11 @@ type ManagedSyncClient interface { } type Client interface { - NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error - // Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error } diff --git a/plugin/plugin_managed_sync.go b/plugin/plugin_managed_sync.go deleted file mode 100644 index 8d4b113ebb..0000000000 --- a/plugin/plugin_managed_sync.go +++ /dev/null @@ -1,44 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/scalar" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -func (p *Plugin) managedSync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { - if len(p.sessionTables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - - managedClient, err := p.client.NewManagedSyncClient(ctx, options) - if err != nil { - return fmt.Errorf("failed to create managed sync client: %w", err) - } - - resources := make(chan *schema.Resource) - go func() { - defer close(resources) - switch options.Scheduler { - case SchedulerDFS: - p.syncDfs(ctx, options, managedClient, p.sessionTables, resources) - case SchedulerRoundRobin: - p.syncRoundRobin(ctx, options, managedClient, p.sessionTables, resources) - default: - panic(fmt.Errorf("unknown scheduler %s", options.Scheduler)) - } - }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - res <- rec - } - return nil -} diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index d02ca6bc75..e040976da0 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -25,7 +25,6 @@ type SyncOptions struct { } type ReadOnlyClient interface { - NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error @@ -89,14 +88,8 @@ func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow p.syncTime = options.SyncTime startTime := time.Now() - if p.unmanagedSync { - if err := p.client.Sync(ctx, options, res); err != nil { - return fmt.Errorf("failed to sync unmanaged client: %w", err) - } - } else { - if err := p.managedSync(ctx, options, res); err != nil { - return fmt.Errorf("failed to sync managed client: %w", err) - } + if err := p.client.Sync(ctx, options, res); err != nil { + return fmt.Errorf("failed to sync unmanaged client: %w", err) } p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") diff --git a/plugin/scheduler.go b/plugin/scheduler.go deleted file mode 100644 index c00ed2c8a9..0000000000 --- a/plugin/scheduler.go +++ /dev/null @@ -1,163 +0,0 @@ -package plugin - -import ( - "context" - "errors" - "fmt" - "runtime/debug" - "sync" - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/getsentry/sentry-go" - "github.com/rs/zerolog" - "github.com/thoas/go-funk" -) - -const ( - minTableConcurrency = 1 - minResourceConcurrency = 100 -) - -const periodicMetricLoggerInterval = 30 * time.Second - -func (p *Plugin) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { - clientName := client.ID() - for _, table := range tables { - metrics := p.metrics.TableClient[table.Name][clientName] - p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) - } -} - -func (p *Plugin) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { - var validationErr *schema.ValidationError - ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) - defer cancel() - resource := schema.NewResourceData(table, parent, item) - objectStartTime := time.Now() - clientID := client.ID() - tableMetrics := p.metrics.TableClient[table.Name][clientID] - logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - if table.PreResourceResolver != nil { - if err := table.PreResourceResolver(ctx, client, resource); err != nil { - logger.Error().Err(err).Msg("pre resource resolver failed") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - return nil - } - } - - for _, c := range table.Columns { - p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) - } - - if table.PostResourceResolver != nil { - if err := table.PostResourceResolver(ctx, client, resource); err != nil { - logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - atomic.AddUint64(&tableMetrics.Resources, 1) - return resource -} - -func (p *Plugin) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { - var validationErr *schema.ValidationError - columnStartTime := time.Now() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - - if c.Resolver != nil { - if err := c.Resolver(ctx, client, resource, c); err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } else { - // base use case: try to get column with CamelCase name - v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) - if v != nil { - err := resource.Set(c.Name, v) - if err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - } -} - -func (p *Plugin) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { - defer wg.Done() - - ticker := time.NewTicker(periodicMetricLoggerInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - p.logger.Info(). - Uint64("total_resources", p.metrics.TotalResourcesAtomic()). - Uint64("total_errors", p.metrics.TotalErrorsAtomic()). - Uint64("total_panics", p.metrics.TotalPanicsAtomic()). - Msg("Sync in progress") - } - } -} - -// unparam's suggestion to remove the second parameter is not good advice here. -// nolint:unparam -func max(a, b uint64) uint64 { - if a > b { - return a - } - return b -} diff --git a/scheduler/metrics.go b/scheduler/metrics.go new file mode 100644 index 0000000000..372965ba93 --- /dev/null +++ b/scheduler/metrics.go @@ -0,0 +1,125 @@ +package scheduler + +import ( + "sync/atomic" + "time" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type Metrics struct { + TableClient map[string]map[string]*TableClientMetrics +} + +type TableClientMetrics struct { + Resources uint64 + Errors uint64 + Panics uint64 + StartTime time.Time + EndTime time.Time +} + +func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { + return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics +} + +// Equal compares to stats. Mostly useful in testing +func (s *Metrics) Equal(other *Metrics) bool { + for table, clientStats := range s.TableClient { + for client, stats := range clientStats { + if _, ok := other.TableClient[table]; !ok { + return false + } + if _, ok := other.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(other.TableClient[table][client]) { + return false + } + } + } + for table, clientStats := range other.TableClient { + for client, stats := range clientStats { + if _, ok := s.TableClient[table]; !ok { + return false + } + if _, ok := s.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(s.TableClient[table][client]) { + return false + } + } + } + return true +} + +func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { + s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) + for _, client := range clients { + s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} + } + for _, relation := range table.Relations { + s.initWithClients(relation, clients) + } +} + +func (s *Metrics) TotalErrors() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Errors + } + } + return total +} + +func (s *Metrics) TotalErrorsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Errors) + } + } + return total +} + +func (s *Metrics) TotalPanics() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Panics + } + } + return total +} + +func (s *Metrics) TotalPanicsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Panics) + } + } + return total +} + +func (s *Metrics) TotalResources() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Resources + } + } + return total +} + +func (s *Metrics) TotalResourcesAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Resources) + } + } + return total +} diff --git a/scheduler/metrics_test.go b/scheduler/metrics_test.go new file mode 100644 index 0000000000..1bc11daa58 --- /dev/null +++ b/scheduler/metrics_test.go @@ -0,0 +1,37 @@ +package scheduler + +import "testing" + +func TestMetrics(t *testing.T) { + s := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + s.TableClient["test_table"] = make(map[string]*TableClientMetrics) + s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if s.TotalResources() != 1 { + t.Fatal("expected 1 resource") + } + if s.TotalErrors() != 2 { + t.Fatal("expected 2 error") + } + if s.TotalPanics() != 3 { + t.Fatal("expected 3 panics") + } + + other := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + other.TableClient["test_table"] = make(map[string]*TableClientMetrics) + other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if !s.Equal(other) { + t.Fatal("expected metrics to be equal") + } +} diff --git a/plugin/plugin_managed_source_test.go b/scheduler/plugin_managed_source_test.go.backup similarity index 99% rename from plugin/plugin_managed_source_test.go rename to scheduler/plugin_managed_source_test.go.backup index c2071cc977..e0a006a4ca 100644 --- a/plugin/plugin_managed_source_test.go +++ b/scheduler/plugin_managed_source_test.go.backup @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "context" diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go new file mode 100644 index 0000000000..228582ef7a --- /dev/null +++ b/scheduler/scheduler.go @@ -0,0 +1,275 @@ +package scheduler + +import ( + "bytes" + "context" + "errors" + "fmt" + "runtime/debug" + "sync/atomic" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/getsentry/sentry-go" + "github.com/rs/zerolog" + "github.com/thoas/go-funk" + "golang.org/x/sync/semaphore" +) + +const ( + minTableConcurrency = 1 + minResourceConcurrency = 100 + defaultConcurrency = 200000 +) + +type SchedulerStrategy int + +const ( + SchedulerDFS SchedulerStrategy = iota + SchedulerRoundRobin +) + +var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} +var AllSchedulerNames = [...]string{ + SchedulerDFS: "dfs", + SchedulerRoundRobin: "round-robin", +} + +type Schedulers []SchedulerStrategy + +func (s Schedulers) String() string { + var buffer bytes.Buffer + for i, scheduler := range s { + if i > 0 { + buffer.WriteString(", ") + } + buffer.WriteString(scheduler.String()) + } + return buffer.String() +} + +func (s SchedulerStrategy) String() string { + return AllSchedulerNames[s] +} + +const periodicMetricLoggerInterval = 30 * time.Second + +type Option func(*Scheduler) + +func WithLogger(logger zerolog.Logger) Option { + return func(s *Scheduler) { + s.logger = logger + } +} + +func WithDeterministicCQId(deterministicCQId bool) Option { + return func(s *Scheduler) { + s.deterministicCQId = deterministicCQId + } +} + +func WithConcurrency(concurrency uint64) Option { + return func(s *Scheduler) { + s.concurrency = concurrency + } +} + +type Scheduler struct { + tables schema.Tables + client schema.ClientMeta + caser *caser.Caser + strategy SchedulerStrategy + // status sync metrics + metrics *Metrics + maxDepth uint64 + // resourceSem is a semaphore that limits the number of concurrent resources being fetched + resourceSem *semaphore.Weighted + // tableSem is a semaphore that limits the number of concurrent tables being fetched + tableSems []*semaphore.Weighted + // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. + logger zerolog.Logger + deterministicCQId bool + concurrency uint64 +} + +func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option) *Scheduler { + s := Scheduler{ + tables: tables, + client: client, + metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + caser: caser.New(), + concurrency: defaultConcurrency, + } + for _, opt := range opts { + opt(&s) + } + return &s +} + +func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch s.strategy { + case SchedulerDFS: + s.syncDfs(ctx, resources) + case SchedulerRoundRobin: + s.syncRoundRobin(ctx, resources) + default: + panic(fmt.Errorf("unknown scheduler %s", s.strategy)) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec + } + return nil +} + +// func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { +// clientName := client.ID() +// for _, table := range tables { +// metrics := p.metrics.TableClient[table.Name][clientName] +// p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") +// p.logTablesMetrics(table.Relations, client) +// } +// } + +func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { + var validationErr *schema.ValidationError + ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + resource := schema.NewResourceData(table, parent, item) + objectStartTime := time.Now() + clientID := client.ID() + tableMetrics := p.metrics.TableClient[table.Name][clientID] + logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + if table.PreResourceResolver != nil { + if err := table.PreResourceResolver(ctx, client, resource); err != nil { + logger.Error().Err(err).Msg("pre resource resolver failed") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + return nil + } + } + + for _, c := range table.Columns { + p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) + } + + if table.PostResourceResolver != nil { + if err := table.PostResourceResolver(ctx, client, resource); err != nil { + logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + atomic.AddUint64(&tableMetrics.Resources, 1) + return resource +} + +func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { + var validationErr *schema.ValidationError + columnStartTime := time.Now() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + + if c.Resolver != nil { + if err := c.Resolver(ctx, client, resource, c); err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } else { + // base use case: try to get column with CamelCase name + v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) + if v != nil { + err := resource.Set(c.Name, v) + if err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + } +} + +// func (p *Scheduler) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { +// defer wg.Done() + +// ticker := time.NewTicker(periodicMetricLoggerInterval) +// defer ticker.Stop() + +// for { +// select { +// case <-ctx.Done(): +// return +// case <-ticker.C: +// p.logger.Info(). +// Uint64("total_resources", p.metrics.TotalResourcesAtomic()). +// Uint64("total_errors", p.metrics.TotalErrorsAtomic()). +// Uint64("total_panics", p.metrics.TotalPanicsAtomic()). +// Msg("Sync in progress") +// } +// } +// } + +// unparam's suggestion to remove the second parameter is not good advice here. +// nolint:unparam +func max(a, b uint64) uint64 { + if a > b { + return a + } + return b +} diff --git a/plugin/scheduler_dfs.go b/scheduler/scheduler_dfs.go similarity index 69% rename from plugin/scheduler_dfs.go rename to scheduler/scheduler_dfs.go index bd87c50aeb..7789dd34e4 100644 --- a/plugin/scheduler_dfs.go +++ b/scheduler/scheduler_dfs.go @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "context" @@ -14,27 +14,27 @@ import ( "golang.org/x/sync/semaphore" ) -func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) + tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + s.tableSems = make([]*semaphore.Weighted, s.maxDepth) + for i := uint64(0); i < s.maxDepth; i++ { + s.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) // reduce table concurrency logarithmically for every depth level tableConcurrency = max(tableConcurrency/2, minTableConcurrency) } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + s.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) // we have this because plugins can return sometimes clients in a random way which will cause // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client.(schema.ClientMeta)} + preInitialisedClients := make([][]schema.ClientMeta, len(s.tables)) + for i, table := range s.tables { + clients := []schema.ClientMeta{s.client} if table.Multiplex != nil { - clients = table.Multiplex(client.(schema.ClientMeta)) + clients = table.Multiplex(s.client) } // Detect duplicate clients while multiplexing seenClients := make(map[string]bool) @@ -46,44 +46,44 @@ func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Manage scope.SetTag("table", table.Name) sentry.CurrentHub().CaptureMessage("duplicate client ID in " + table.Name) }) - p.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") + s.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") } } preInitialisedClients[i] = clients // we do this here to avoid locks so we initial the metrics structure once in the main goroutines // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) + s.metrics.initWithClients(table, clients) } // We start a goroutine that logs the metrics periodically. // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) + // var logWg sync.WaitGroup + // logWg.Add(1) - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) + // logCtx, logCancel := context.WithCancel(ctx) + // go s.periodicMetricLogger(logCtx, &logWg) var wg sync.WaitGroup - for i, table := range tables { + for i, table := range s.tables { table := table clients := preInitialisedClients[i] for _, client := range clients { client := client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() return } wg.Add(1) go func() { defer wg.Done() - defer p.tableSems[0].Release(1) + defer s.tableSems[0].Release(1) // not checking for error here as nothing much todo. // the error is logged and this happens when context is cancelled - p.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) + s.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) }() } } @@ -92,19 +92,19 @@ func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Manage wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() } -func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { +func (s *Scheduler) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { var validationErr *schema.ValidationError clientName := client.ID() - logger := p.logger.With().Str("table", table.Name).Str("client", clientName).Logger() + logger := s.logger.With().Str("table", table.Name).Str("client", clientName).Logger() if parent == nil { // Log only for root tables, otherwise we spam too much. logger.Info().Msg("top level table resolver started") } - tableMetrics := p.metrics.TableClient[table.Name][clientName] + tableMetrics := s.metrics.TableClient[table.Name][clientName] res := make(chan any) go func() { @@ -134,17 +134,17 @@ func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, clien }() for r := range res { - p.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) + s.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) } // we don't need any waitgroups here because we are waiting for the channel to close if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) + // s.logTablesMetrics(table.Relations, client) } } -func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { +func (s *Scheduler) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { resourcesSlice := helpers.InterfaceSlice(resources) if len(resourcesSlice) == 0 { return @@ -156,25 +156,25 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c sentValidationErrors := sync.Map{} for i := range resourcesSlice { i := i - if err := p.resourceSem.Acquire(ctx, 1); err != nil { - p.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") + if err := s.resourceSem.Acquire(ctx, 1); err != nil { + s.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") wg.Wait() // we have to continue emptying the channel to exit gracefully return } wg.Add(1) go func() { - defer p.resourceSem.Release(1) + defer s.resourceSem.Release(1) defer wg.Done() //nolint:all - resolvedResource := p.resolveResource(ctx, table, client, parent, resourcesSlice[i]) + resolvedResource := s.resolveResource(ctx, table, client, parent, resourcesSlice[i]) if resolvedResource == nil { return } - if err := resolvedResource.CalculateCQID(p.deterministicCQId); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") + if err := resolvedResource.CalculateCQID(s.deterministicCQId); err != nil { + tableMetrics := s.metrics.TableClient[table.Name][client.ID()] + s.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { // send resource validation errors to Sentry only once per table, // to avoid sending too many duplicate messages @@ -187,8 +187,8 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c return } if err := resolvedResource.Validate(); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") + tableMetrics := s.metrics.TableClient[table.Name][client.ID()] + s.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { // send resource validation errors to Sentry only once per table, // to avoid sending too many duplicate messages @@ -212,7 +212,7 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c resolvedResources <- resource for _, relation := range resource.Table.Relations { relation := relation - if err := p.tableSems[depth].Acquire(ctx, 1); err != nil { + if err := s.tableSems[depth].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() return @@ -220,8 +220,8 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c wg.Add(1) go func() { defer wg.Done() - defer p.tableSems[depth].Release(1) - p.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) + defer s.tableSems[depth].Release(1) + s.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) }() } } diff --git a/plugin/scheduler_round_robin.go b/scheduler/scheduler_round_robin.go similarity index 64% rename from plugin/scheduler_round_robin.go rename to scheduler/scheduler_round_robin.go index a0be17938d..104e8f4514 100644 --- a/plugin/scheduler_round_robin.go +++ b/scheduler/scheduler_round_robin.go @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "context" @@ -13,63 +13,63 @@ type tableClient struct { client schema.ClientMeta } -func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) +func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- *schema.Resource) { + tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + s.tableSems = make([]*semaphore.Weighted, s.maxDepth) + for i := uint64(0); i < s.maxDepth; i++ { + s.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) // reduce table concurrency logarithmically for every depth level tableConcurrency = max(tableConcurrency/2, minTableConcurrency) } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + s.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) // we have this because plugins can return sometimes clients in a random way which will cause // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client.(schema.ClientMeta)} + preInitialisedClients := make([][]schema.ClientMeta, len(s.tables)) + for i, table := range s.tables { + clients := []schema.ClientMeta{s.client} if table.Multiplex != nil { - clients = table.Multiplex(client.(schema.ClientMeta)) + clients = table.Multiplex(s.client) } preInitialisedClients[i] = clients // we do this here to avoid locks so we initial the metrics structure once in the main goroutines // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) + s.metrics.initWithClients(table, clients) } // We start a goroutine that logs the metrics periodically. // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) + // var logWg sync.WaitGroup + // logWg.Add(1) - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) + // logCtx, logCancel := context.WithCancel(ctx) + // go p.periodicMetricLogger(logCtx, &logWg) - tableClients := roundRobinInterleave(tables, preInitialisedClients) + tableClients := roundRobinInterleave(s.tables, preInitialisedClients) var wg sync.WaitGroup for _, tc := range tableClients { table := tc.table cl := tc.client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() return } wg.Add(1) go func() { defer wg.Done() - defer p.tableSems[0].Release(1) + defer s.tableSems[0].Release(1) // not checking for error here as nothing much to do. // the error is logged and this happens when context is cancelled // Round Robin currently uses the DFS algorithm to resolve the tables, but this // may change in the future. - p.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) + s.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) }() } @@ -77,8 +77,8 @@ func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() } // interleave table-clients so that we get: diff --git a/plugin/scheduler_round_robin_test.go b/scheduler/scheduler_round_robin_test.go similarity index 94% rename from plugin/scheduler_round_robin_test.go rename to scheduler/scheduler_round_robin_test.go index 428b13c8a6..3b746b81bf 100644 --- a/plugin/scheduler_round_robin_test.go +++ b/scheduler/scheduler_round_robin_test.go @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "testing" @@ -6,6 +6,13 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +type testExecutionClient struct { +} + +func (t *testExecutionClient) ID() string { + return "test" +} + func TestRoundRobinInterleave(t *testing.T) { table1 := &schema.Table{Name: "test_table"} table2 := &schema.Table{Name: "test_table2"} From 93dfff6d6b5479975a8f1c501ad4b1045c6f8781 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 5 Jun 2023 23:12:45 +0300 Subject: [PATCH 012/125] more wip --- helpers/internal_columns.go | 1 + {plugin => internal/memdb}/memdb.go | 83 ++-------- {plugin => internal/memdb}/memdb_test.go | 88 +++------- .../servers/destination/v0/destinations.go | 13 +- .../servers/destination/v1/destinations.go | 13 +- internal/servers/plugin/v3/plugin.go | 23 +-- plugin/docs.go | 6 +- plugin/metrics.go | 125 -------------- plugin/metrics_test.go | 37 ----- plugin/options.go | 73 +------- plugin/plugin.go | 156 ++++-------------- plugin/plugin_reader.go | 19 +-- plugin/plugin_test.go | 29 +--- plugin/plugin_writer.go | 39 +++-- scheduler/scheduler.go | 36 +--- scheduler/scheduler_dfs.go | 17 +- scheduler/scheduler_round_robin.go | 15 -- transformers/tables.go | 58 +++++++ plugin/managed_writer.go => writers/batch.go | 130 +++++++++++---- writers/batch_test.go | 56 +++++++ 20 files changed, 342 insertions(+), 675 deletions(-) create mode 100644 helpers/internal_columns.go rename {plugin => internal/memdb}/memdb.go (67%) rename {plugin => internal/memdb}/memdb_test.go (54%) delete mode 100644 plugin/metrics.go delete mode 100644 plugin/metrics_test.go create mode 100644 transformers/tables.go rename plugin/managed_writer.go => writers/batch.go (51%) create mode 100644 writers/batch_test.go diff --git a/helpers/internal_columns.go b/helpers/internal_columns.go new file mode 100644 index 0000000000..12668d607f --- /dev/null +++ b/helpers/internal_columns.go @@ -0,0 +1 @@ +package helpers \ No newline at end of file diff --git a/plugin/memdb.go b/internal/memdb/memdb.go similarity index 67% rename from plugin/memdb.go rename to internal/memdb/memdb.go index 8c23b430a3..13ad7f74b9 100644 --- a/plugin/memdb.go +++ b/internal/memdb/memdb.go @@ -1,4 +1,4 @@ -package plugin +package memdb import ( "context" @@ -8,6 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -35,7 +36,7 @@ func WithBlockingWrite() MemDBOption { } } -func GetNewClient(options ...MemDBOption) NewClientFunc { +func GetNewClient(options ...MemDBOption) plugin.NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -43,19 +44,19 @@ func GetNewClient(options ...MemDBOption) NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, any) (Client, error) { + return func(context.Context, zerolog.Logger, any) (plugin.Client, error) { return c, nil } } -func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (plugin.Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), }, nil } -func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (plugin.Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -84,11 +85,7 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) NewManagedSyncClient(context.Context, SyncOptions) (ManagedSyncClient, error) { - return nil, fmt.Errorf("not supported") -} - -func (c *client) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { @@ -99,7 +96,15 @@ func (c *client) Sync(ctx context.Context, options SyncOptions, res chan<- arrow return nil } -func (c *client) Migrate(_ context.Context, tables schema.Tables, migrateMode MigrateMode) error { +func (c *client) Tables(ctx context.Context) (schema.Tables, error) { + tables := make(schema.Tables, 0, len(c.tables)) + for _, table := range c.tables { + tables = append(tables, table) + } + return tables, nil +} + +func (c *client) Migrate(_ context.Context, tables schema.Tables, options plugin.MigrateOptions) error { for _, table := range tables { tableName := table.Name memTable := c.memoryDB[tableName] @@ -120,32 +125,7 @@ func (c *client) Migrate(_ context.Context, tables schema.Tables, migrateMode Mi return nil } -func (c *client) Read(_ context.Context, table *schema.Table, source string, res chan<- arrow.Record) error { - tableName := table.Name - if c.memoryDB[tableName] == nil { - return nil - } - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - if sourceColIndex == -1 { - return fmt.Errorf("table %s doesn't have source column", tableName) - } - var sortedRes []arrow.Record - c.memoryDBLock.RLock() - for _, row := range c.memoryDB[tableName] { - arr := row.Column(sourceColIndex) - if arr.(*array.String).Value(0) == source { - sortedRes = append(sortedRes, row) - } - } - c.memoryDBLock.RUnlock() - - for _, row := range sortedRes { - res <- row - } - return nil -} - -func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode, resources <-chan arrow.Record) error { +func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resources <-chan arrow.Record) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -165,7 +145,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if writeMode == WriteModeAppend { + if options.WriteMode == plugin.WriteModeAppend { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -175,33 +155,6 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode return nil } -func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, resources []arrow.Record) error { - if c.errOnWrite { - return fmt.Errorf("errOnWrite") - } - if c.blockingWrite { - <-ctx.Done() - if c.errOnWrite { - return fmt.Errorf("errOnWrite") - } - return nil - } - tableName := table.Name - for _, resource := range resources { - c.memoryDBLock.Lock() - if writeMode == WriteModeAppend { - c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) - } else { - c.overwrite(table, resource) - } - c.memoryDBLock.Unlock() - } - return nil -} - -func (*client) Metrics() Metrics { - return Metrics{} -} func (c *client) Close(context.Context) error { c.memoryDB = nil diff --git a/plugin/memdb_test.go b/internal/memdb/memdb_test.go similarity index 54% rename from plugin/memdb_test.go rename to internal/memdb/memdb_test.go index 2f9f54a506..e04a23bd1a 100644 --- a/plugin/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -1,4 +1,4 @@ -package plugin +package memdb import ( "context" @@ -7,90 +7,52 @@ import ( "github.com/apache/arrow/go/v13/arrow" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" ) -var migrateStrategyOverwrite = MigrateStrategy{ - AddColumn: MigrateModeForce, - AddColumnNotNull: MigrateModeForce, - RemoveColumn: MigrateModeForce, - RemoveColumnNotNull: MigrateModeForce, - ChangeColumn: MigrateModeForce, +var migrateStrategyOverwrite = plugin.MigrateStrategy{ + AddColumn: plugin.MigrateModeForce, + AddColumnNotNull: plugin.MigrateModeForce, + RemoveColumn: plugin.MigrateModeForce, + RemoveColumnNotNull: plugin.MigrateModeForce, + ChangeColumn: plugin.MigrateModeForce, } -var migrateStrategyAppend = MigrateStrategy{ - AddColumn: MigrateModeForce, - AddColumnNotNull: MigrateModeForce, - RemoveColumn: MigrateModeForce, - RemoveColumnNotNull: MigrateModeForce, - ChangeColumn: MigrateModeForce, +var migrateStrategyAppend = plugin.MigrateStrategy{ + AddColumn: plugin.MigrateModeForce, + AddColumnNotNull: plugin.MigrateModeForce, + RemoveColumn: plugin.MigrateModeForce, + RemoveColumnNotNull: plugin.MigrateModeForce, + ChangeColumn: plugin.MigrateModeForce, } func TestPluginUnmanagedClient(t *testing.T) { - PluginTestSuiteRunner( + plugin.PluginTestSuiteRunner( t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient) + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewMemDBClient) }, nil, - PluginTestSuiteTests{ + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }, ) } -func TestPluginManagedClient(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter()) - }, - nil, - PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) -} - -func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), - WithDefaultBatchSize(1), - WithDefaultBatchSizeBytes(1)) - }, nil, - PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) -} - -func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), - WithDefaultBatchSize(100000000), - WithDefaultBatchSizeBytes(100000000)) - }, - nil, - PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) -} - func TestPluginManagedClientWithCQPKs(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewMemDBClient) }, pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{ PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, }, }, - PluginTestSuiteTests{ + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) @@ -98,7 +60,7 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() - p := NewPlugin("test", "development", NewMemDBClientErrOnNew) + p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) err := p.Init(ctx, nil) if err == nil { @@ -109,7 +71,7 @@ func TestPluginOnNewError(t *testing.T) { func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) - p := NewPlugin("test", "development", newClientFunc) + p := plugin.NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, nil); err != nil { t.Fatal(err) } @@ -144,7 +106,7 @@ func TestOnWriteError(t *testing.T) { func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) - p := NewPlugin("test", "development", newClientFunc) + p := plugin.NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{}, }); err != nil { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 7419cf10f2..4c22750e69 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -3,7 +3,6 @@ package destination import ( "context" "encoding/json" - "fmt" "io" "github.com/apache/arrow/go/v13/arrow" @@ -212,13 +211,13 @@ func SetDestinationManagedCqColumns(tables []*schema.Table) { } func (s *Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { - stats := s.Plugin.Metrics() - b, err := json.Marshal(stats) - if err != nil { - return nil, fmt.Errorf("failed to marshal stats: %w", err) - } + // stats := s.Plugin.Metrics() + // b, err := json.Marshal(stats) + // if err != nil { + // return nil, fmt.Errorf("failed to marshal stats: %w", err) + // } return &pb.GetDestinationMetrics_Response{ - Metrics: b, + // Metrics: b, }, nil } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index d53e5e2ee9..45cfa4f7f7 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/json" - "fmt" "io" "github.com/apache/arrow/go/v13/arrow" @@ -171,13 +170,13 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { } func (s *Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { - stats := s.Plugin.Metrics() - b, err := json.Marshal(stats) - if err != nil { - return nil, fmt.Errorf("failed to marshal stats: %w", err) - } + // stats := s.Plugin.Metrics() + // b, err := json.Marshal(stats) + // if err != nil { + // return nil, fmt.Errorf("failed to marshal stats: %w", err) + // } return &pb.GetDestinationMetrics_Response{ - Metrics: b, + // Metrics: b, }, nil } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index fa432af917..00e2543d99 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -33,30 +33,17 @@ type Server struct { NoSentry bool } -func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { - tables := s.Plugin.StaticTables().ToArrowSchemas() +func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTables_Response, error) { + tables := s.Plugin.Tables().ToArrowSchemas() encoded, err := tables.Encode() if err != nil { return nil, fmt.Errorf("failed to encode tables: %w", err) } - return &pb.GetStaticTables_Response{ + return &pb.GetTables_Response{ Tables: encoded, }, nil } -func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { - tables := s.Plugin.DynamicTables() - if tables == nil { - return &pb.GetDynamicTables_Response{}, nil - } - encoded, err := tables.ToArrowSchemas().Encode() - if err != nil { - return nil, fmt.Errorf("failed to encode tables: %w", err) - } - return &pb.GetDynamicTables_Response{ - Tables: encoded, - }, nil -} func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { return &pb.GetName_Response{ @@ -86,10 +73,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { Tables: req.Tables, SkipTables: req.SkipTables, Concurrency: req.Concurrency, - Scheduler: plugin.SchedulerDFS, - } - if req.Scheduler == pb.SCHEDULER_SCHEDULER_ROUND_ROBIN { - syncOptions.Scheduler = plugin.SchedulerRoundRobin } // sourceName := req.SourceName diff --git a/plugin/docs.go b/plugin/docs.go index b100ea649c..6e4dccf581 100644 --- a/plugin/docs.go +++ b/plugin/docs.go @@ -2,6 +2,7 @@ package plugin import ( "bytes" + "context" "embed" "encoding/json" "fmt" @@ -83,7 +84,10 @@ func (p *Plugin) GeneratePluginDocs(dir string, format pbPlugin.GenDocs_FORMAT) if err := os.MkdirAll(dir, os.ModePerm); err != nil { return err } - tables := p.staticTables + tables, err := p.Tables(context.Background()) + if err != nil { + return err + } setDestinationManagedCqColumns(tables) sortedTables := make(schema.Tables, 0, len(tables)) diff --git a/plugin/metrics.go b/plugin/metrics.go deleted file mode 100644 index 8ba88823b9..0000000000 --- a/plugin/metrics.go +++ /dev/null @@ -1,125 +0,0 @@ -package plugin - -import ( - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -type Metrics struct { - TableClient map[string]map[string]*TableClientMetrics -} - -type TableClientMetrics struct { - Resources uint64 - Errors uint64 - Panics uint64 - StartTime time.Time - EndTime time.Time -} - -func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { - return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics -} - -// Equal compares to stats. Mostly useful in testing -func (s *Metrics) Equal(other *Metrics) bool { - for table, clientStats := range s.TableClient { - for client, stats := range clientStats { - if _, ok := other.TableClient[table]; !ok { - return false - } - if _, ok := other.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(other.TableClient[table][client]) { - return false - } - } - } - for table, clientStats := range other.TableClient { - for client, stats := range clientStats { - if _, ok := s.TableClient[table]; !ok { - return false - } - if _, ok := s.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(s.TableClient[table][client]) { - return false - } - } - } - return true -} - -func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { - s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) - for _, client := range clients { - s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} - } - for _, relation := range table.Relations { - s.initWithClients(relation, clients) - } -} - -func (s *Metrics) TotalErrors() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Errors - } - } - return total -} - -func (s *Metrics) TotalErrorsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Errors) - } - } - return total -} - -func (s *Metrics) TotalPanics() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Panics - } - } - return total -} - -func (s *Metrics) TotalPanicsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Panics) - } - } - return total -} - -func (s *Metrics) TotalResources() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Resources - } - } - return total -} - -func (s *Metrics) TotalResourcesAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Resources) - } - } - return total -} diff --git a/plugin/metrics_test.go b/plugin/metrics_test.go deleted file mode 100644 index a566edee5d..0000000000 --- a/plugin/metrics_test.go +++ /dev/null @@ -1,37 +0,0 @@ -package plugin - -import "testing" - -func TestMetrics(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if s.TotalResources() != 1 { - t.Fatal("expected 1 resource") - } - if s.TotalErrors() != 2 { - t.Fatal("expected 2 error") - } - if s.TotalPanics() != 3 { - t.Fatal("expected 3 panics") - } - - other := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - other.TableClient["test_table"] = make(map[string]*TableClientMetrics) - other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if !s.Equal(other) { - t.Fatal("expected metrics to be equal") - } -} diff --git a/plugin/options.go b/plugin/options.go index 66a13b69f7..aaa8687a51 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,10 +1,7 @@ package plugin import ( - "bytes" - "context" "fmt" - "time" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -65,46 +62,8 @@ func (m WriteMode) String() string { return writeModeStrings[m] } -type Scheduler int - -const ( - SchedulerDFS Scheduler = iota - SchedulerRoundRobin -) - -var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} -var AllSchedulerNames = [...]string{ - SchedulerDFS: "dfs", - SchedulerRoundRobin: "round-robin", -} - -type Schedulers []Scheduler - -func (s Schedulers) String() string { - var buffer bytes.Buffer - for i, scheduler := range s { - if i > 0 { - buffer.WriteString(", ") - } - buffer.WriteString(scheduler.String()) - } - return buffer.String() -} - -func (s Scheduler) String() string { - return AllSchedulerNames[s] -} - -type GetTables func(ctx context.Context, c Client) (schema.Tables, error) - type Option func(*Plugin) -// WithDynamicTable allows the plugin to return list of tables after call to New -func WithDynamicTable(getDynamicTables GetTables) Option { - return func(p *Plugin) { - p.getDynamicTables = getDynamicTables - } -} // WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables func WithNoInternalColumns() Option { @@ -119,34 +78,4 @@ func WithTitleTransformer(t func(*schema.Table) string) Option { return func(p *Plugin) { p.titleTransformer = t } -} - -func WithStaticTables(tables schema.Tables) Option { - return func(p *Plugin) { - p.staticTables = tables - } -} - -func WithManagedWriter() Option { - return func(p *Plugin) { - p.managedWriter = true - } -} - -func WithBatchTimeout(seconds int) Option { - return func(p *Plugin) { - p.batchTimeout = time.Duration(seconds) * time.Second - } -} - -func WithDefaultBatchSize(defaultBatchSize int) Option { - return func(p *Plugin) { - p.defaultBatchSize = defaultBatchSize - } -} - -func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { - return func(p *Plugin) { - p.defaultBatchSizeBytes = defaultBatchSizeBytes - } -} +} \ No newline at end of file diff --git a/plugin/plugin.go b/plugin/plugin.go index 44a8731fec..452d650196 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -13,23 +13,13 @@ import ( "golang.org/x/sync/semaphore" ) -const ( - defaultBatchTimeoutSeconds = 20 - defaultBatchSize = 10000 - defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB -) - type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) -type ManagedSyncClient interface { - ID() string -} - type Client interface { + Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error - Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error - WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error - Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error + Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateOptions) error + Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error Close(ctx context.Context) error } @@ -44,10 +34,6 @@ func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, writ return fmt.Errorf("not implemented") } -func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error { - return fmt.Errorf("not implemented") -} - func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { return fmt.Errorf("not implemented") } @@ -58,10 +44,8 @@ func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan return fmt.Errorf("not implemented") } -type UnimplementedRead struct{} - -func (UnimplementedRead) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return fmt.Errorf("not implemented") +func (UnimplementedSync) Tables(ctx context.Context) (schema.Tables, error) { + return nil, fmt.Errorf("not implemented") } // Plugin is the base structure required to pass to sdk.serve @@ -73,12 +57,6 @@ type Plugin struct { version string // Called upon init call to validate and init configuration newClient NewClientFunc - // dynamic table function if specified - getDynamicTables GetTables - // Tables are static tables that defined in compile time by the plugin - staticTables schema.Tables - // status sync metrics - metrics *Metrics // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. logger zerolog.Logger // resourceSem is a semaphore that limits the number of concurrent resources being fetched @@ -91,87 +69,24 @@ type Plugin struct { caser *caser.Caser // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) mu sync.Mutex - // client is the initialized session client client Client - // sessionTables are the - sessionTables schema.Tables // spec is the spec the client was initialized with spec any // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id // useful for sources such as PostgreSQL and other databases internalColumns bool - // unmanagedSync if set to true then the plugin will call Sync directly and not use the scheduler - unmanagedSync bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation titleTransformer func(*schema.Table) string syncTime time.Time sourceName string deterministicCQId bool - - managedWriter bool - workers map[string]*worker - workersLock *sync.Mutex - - batchTimeout time.Duration - defaultBatchSize int - defaultBatchSizeBytes int } const ( maxAllowedDepth = 4 ) -// Add internal columns -func (p *Plugin) addInternalColumns(tables []*schema.Table) error { - for _, table := range tables { - if c := table.Column("_cq_id"); c != nil { - return fmt.Errorf("table %s already has column _cq_id", table.Name) - } - cqID := schema.CqIDColumn - if len(table.PrimaryKeys()) == 0 { - cqID.PrimaryKey = true - } - cqSourceName := schema.CqSourceNameColumn - cqSyncTime := schema.CqSyncTimeColumn - cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.sourceName) - } - cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.syncTime) - } - - table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) - if err := p.addInternalColumns(table.Relations); err != nil { - return err - } - } - return nil -} - -// Set parent links on relational tables -func setParents(tables schema.Tables, parent *schema.Table) { - for _, table := range tables { - table.Parent = parent - setParents(table.Relations, table) - } -} - -// Apply transformations to tables -func transformTables(tables schema.Tables) error { - for _, table := range tables { - if table.Transform != nil { - if err := table.Transform(table); err != nil { - return fmt.Errorf("failed to transform table %s: %w", table.Name, err) - } - } - if err := transformTables(table.Relations); err != nil { - return err - } - } - return nil -} - func maxDepth(tables schema.Tables) uint64 { var depth uint64 if len(tables) == 0 { @@ -190,41 +105,16 @@ func maxDepth(tables schema.Tables) uint64 { // Depending on the options, it can be write only plugin, read only plugin or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ - name: name, - version: version, - internalColumns: true, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - newClient: newClient, - metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, - workers: make(map[string]*worker), - workersLock: &sync.Mutex{}, - batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, - defaultBatchSize: defaultBatchSize, - defaultBatchSizeBytes: defaultBatchSizeBytes, + name: name, + version: version, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, } for _, opt := range options { opt(&p) } - if p.staticTables != nil { - setParents(p.staticTables, nil) - if err := transformTables(p.staticTables); err != nil { - panic(err) - } - if p.internalColumns { - if err := p.addInternalColumns(p.staticTables); err != nil { - panic(err) - } - } - p.maxDepth = maxDepth(p.staticTables) - if p.maxDepth > maxAllowedDepth { - panic(fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth)) - } - if err := p.validate(p.staticTables); err != nil { - panic(err) - } - } - return &p } @@ -242,8 +132,28 @@ func (p *Plugin) SetLogger(logger zerolog.Logger) { p.logger = logger.With().Str("module", p.name+"-src").Logger() } -func (p *Plugin) Metrics() *Metrics { - return p.metrics +func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { + tables, err := p.client.Tables(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get tables: %w", err) + } + setParents(tables, nil) + if err := transformTables(tables); err != nil { + return nil, err + } + if p.internalColumns { + if err := p.addInternalColumns(tables); err != nil { + return nil, err + } + } + p.maxDepth = maxDepth(tables) + if p.maxDepth > maxAllowedDepth { + return nil, fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) + } + if err := p.validate(tables); err != nil { + return nil, err + } + return tables, nil } // Init initializes the plugin with the given spec. diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index e040976da0..da76a3ed97 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -6,7 +6,6 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" ) @@ -15,7 +14,6 @@ type SyncOptions struct { Tables []string SkipTables []string Concurrency int64 - Scheduler Scheduler DeterministicCQID bool // SyncTime if specified then this will be add to every table as _sync_time column SyncTime time.Time @@ -26,7 +24,6 @@ type SyncOptions struct { type ReadOnlyClient interface { Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error - Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error } @@ -51,18 +48,6 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } -// Tables returns all tables supported by this source plugin -func (p *Plugin) StaticTables() schema.Tables { - return p.staticTables -} - -func (p *Plugin) HasDynamicTables() bool { - return p.getDynamicTables != nil -} - -func (p *Plugin) DynamicTables() schema.Tables { - return p.sessionTables -} func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error @@ -86,12 +71,12 @@ func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow } defer p.mu.Unlock() p.syncTime = options.SyncTime - startTime := time.Now() + // startTime := time.Now() if err := p.client.Sync(ctx, options, res); err != nil { return fmt.Errorf("failed to sync unmanaged client: %w", err) } - p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") + // p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") return nil } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index 6fe3d0aa7e..d0016a113b 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -11,7 +11,7 @@ import ( func TestPluginUnmanagedSync(t *testing.T) { ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanagedSync()) + p := NewPlugin("test", "v0.0.0", NewMemDBClient) testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) syncTime := time.Now().UTC() sourceName := "test" @@ -74,30 +74,3 @@ func TestPluginUnmanagedSync(t *testing.T) { t.Fatal(err) } } - -// func TestPluginInit(t *testing.T) { -// const ( -// batchSize = uint64(100) -// batchSizeBytes = uint64(1000) -// ) - -// var ( -// batchSizeObserved uint64 -// batchSizeBytesObserved uint64 -// ) -// p := NewPlugin( -// "test", -// "development", -// func(ctx context.Context, logger zerolog.Logger, s any) (Client, error) { -// batchSizeObserved = s.WriteSpec.BatchSize -// batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes -// return NewMemDBClient(ctx, logger, s) -// }, -// WithDefaultBatchSize(int(batchSize)), -// WithDefaultBatchSizeBytes(int(batchSizeBytes)), -// ) -// require.NoError(t, p.Init(context.TODO(), nil)) - -// require.Equal(t, batchSize, batchSizeObserved) -// require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -// } diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index af37b8df48..d0420182be 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -9,21 +9,33 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { +type WriteOptions struct { + // WriteMode is the mode to write to the database + WriteMode WriteMode + // Predefined tables are available if tables are known at the start of the write + Tables schema.Tables +} + +type MigrateOptions struct { + // MigrateMode is the mode to migrate the database + MigrateMode MigrateMode +} + +func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, options MigrateOptions) error { if p.client == nil { return fmt.Errorf("plugin is not initialized") } - return p.client.Migrate(ctx, tables, migrateMode) + return p.client.Migrate(ctx, tables, options) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resource arrow.Record) error { +func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource arrow.Record) error { resources := []arrow.Record{resource} - return p.writeAll(ctx, sourceName, syncTime, writeMode, resources) + return p.writeAll(ctx, options, resources) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resources []arrow.Record) error { +func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []arrow.Record) error { ch := make(chan arrow.Record, len(resources)) for _, resource := range resources { ch <- resource @@ -48,21 +60,14 @@ func (p *Plugin) writeAll(ctx context.Context, sourceName string, syncTime time. tables = append(tables, table) tableNames[table.Name] = struct{}{} } - return p.Write(ctx, sourceName, tables, syncTime, writeMode, ch) + options.Tables = tables + return p.Write(ctx, options, ch) } -func (p *Plugin) Write(ctx context.Context, sourceName string, tables schema.Tables, syncTime time.Time, writeMode WriteMode, res <-chan arrow.Record) error { - syncTime = syncTime.UTC() - if p.managedWriter { - if err := p.writeManagedTableBatch(ctx, tables, writeMode, res); err != nil { - return err - } - } else { - if err := p.client.Write(ctx, tables, writeMode, res); err != nil { - return err - } +func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error { + if err := p.client.Write(ctx, options, res); err != nil { + return err } - return nil } diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 228582ef7a..d9c2654634 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -134,14 +134,14 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { return nil } -// func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { -// clientName := client.ID() -// for _, table := range tables { -// metrics := p.metrics.TableClient[table.Name][clientName] -// p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") -// p.logTablesMetrics(table.Relations, client) -// } -// } +func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { + clientName := client.ID() + for _, table := range tables { + metrics := p.metrics.TableClient[table.Name][clientName] + p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") + p.logTablesMetrics(table.Relations, client) + } +} func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { var validationErr *schema.ValidationError @@ -245,26 +245,6 @@ func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, ta } } -// func (p *Scheduler) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { -// defer wg.Done() - -// ticker := time.NewTicker(periodicMetricLoggerInterval) -// defer ticker.Stop() - -// for { -// select { -// case <-ctx.Done(): -// return -// case <-ticker.C: -// p.logger.Info(). -// Uint64("total_resources", p.metrics.TotalResourcesAtomic()). -// Uint64("total_errors", p.metrics.TotalErrorsAtomic()). -// Uint64("total_panics", p.metrics.TotalPanicsAtomic()). -// Msg("Sync in progress") -// } -// } -// } - // unparam's suggestion to remove the second parameter is not good advice here. // nolint:unparam func max(a, b uint64) uint64 { diff --git a/scheduler/scheduler_dfs.go b/scheduler/scheduler_dfs.go index 7789dd34e4..f0d465684f 100644 --- a/scheduler/scheduler_dfs.go +++ b/scheduler/scheduler_dfs.go @@ -55,14 +55,6 @@ func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schem s.metrics.initWithClients(table, clients) } - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - // var logWg sync.WaitGroup - // logWg.Add(1) - - // logCtx, logCancel := context.WithCancel(ctx) - // go s.periodicMetricLogger(logCtx, &logWg) - var wg sync.WaitGroup for i, table := range s.tables { table := table @@ -72,9 +64,6 @@ func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schem if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() return } wg.Add(1) @@ -90,10 +79,6 @@ func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schem // Wait for all the worker goroutines to finish wg.Wait() - - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() } func (s *Scheduler) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { @@ -140,7 +125,7 @@ func (s *Scheduler) resolveTableDfs(ctx context.Context, table *schema.Table, cl // we don't need any waitgroups here because we are waiting for the channel to close if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") - // s.logTablesMetrics(table.Relations, client) + s.logTablesMetrics(table.Relations, client) } } diff --git a/scheduler/scheduler_round_robin.go b/scheduler/scheduler_round_robin.go index 104e8f4514..43bd337862 100644 --- a/scheduler/scheduler_round_robin.go +++ b/scheduler/scheduler_round_robin.go @@ -39,14 +39,6 @@ func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- s.metrics.initWithClients(table, clients) } - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - // var logWg sync.WaitGroup - // logWg.Add(1) - - // logCtx, logCancel := context.WithCancel(ctx) - // go p.periodicMetricLogger(logCtx, &logWg) - tableClients := roundRobinInterleave(s.tables, preInitialisedClients) var wg sync.WaitGroup @@ -56,9 +48,6 @@ func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() return } wg.Add(1) @@ -75,10 +64,6 @@ func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- // Wait for all the worker goroutines to finish wg.Wait() - - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() } // interleave table-clients so that we get: diff --git a/transformers/tables.go b/transformers/tables.go new file mode 100644 index 0000000000..94532c6ca3 --- /dev/null +++ b/transformers/tables.go @@ -0,0 +1,58 @@ +package transformers + +import ( + "context" + "fmt" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +// Set parent links on relational tables +func setParents(tables schema.Tables, parent *schema.Table) { + for _, table := range tables { + table.Parent = parent + setParents(table.Relations, table) + } +} + +// Add internal columns +func AddInternalColumns(tables []*schema.Table) error { + for _, table := range tables { + if c := table.Column("_cq_id"); c != nil { + return fmt.Errorf("table %s already has column _cq_id", table.Name) + } + cqID := schema.CqIDColumn + if len(table.PrimaryKeys()) == 0 { + cqID.PrimaryKey = true + } + cqSourceName := schema.CqSourceNameColumn + cqSyncTime := schema.CqSyncTimeColumn + cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.sourceName) + } + cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.syncTime) + } + + table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) + if err := AddInternalColumns(table.Relations); err != nil { + return err + } + } + return nil +} + +// Apply transformations to tables +func TransformTables(tables schema.Tables) error { + for _, table := range tables { + if table.Transform != nil { + if err := table.Transform(table); err != nil { + return fmt.Errorf("failed to transform table %s: %w", table.Name, err) + } + } + if err := TransformTables(table.Relations); err != nil { + return err + } + } + return nil +} \ No newline at end of file diff --git a/plugin/managed_writer.go b/writers/batch.go similarity index 51% rename from plugin/managed_writer.go rename to writers/batch.go index 6af7fd0004..186643aaf0 100644 --- a/plugin/managed_writer.go +++ b/writers/batch.go @@ -1,4 +1,4 @@ -package plugin +package writers import ( "context" @@ -10,8 +10,57 @@ import ( "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/internal/pk" "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/rs/zerolog" ) +const ( + defaultBatchTimeoutSeconds = 20 + defaultBatchSize = 10000 + defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB +) + +type BatchWriterClient interface { + WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error +} + +type BatchWriter struct { + tables schema.Tables + client BatchWriterClient + workers map[string]*worker + workersLock *sync.Mutex + + logger zerolog.Logger + batchTimeout time.Duration + batchSize int + batchSizeBytes int +} + +type Option func(*BatchWriter) + +func WithLogger(logger zerolog.Logger) Option { + return func(p *BatchWriter) { + p.logger = logger + } +} + +func WithBatchTimeout(timeout time.Duration) Option { + return func(p *BatchWriter) { + p.batchTimeout = timeout + } +} + +func WithBatchSize(size int) Option { + return func(p *BatchWriter) { + p.batchSize = size + } +} + +func WithBatchSizeBytes(size int) Option { + return func(p *BatchWriter) { + p.batchSizeBytes = size + } +} + type worker struct { count int wg *sync.WaitGroup @@ -19,7 +68,24 @@ type worker struct { flush chan chan bool } -func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, ch <-chan arrow.Record, flush <-chan chan bool) { +func NewBatchWriter(tables schema.Tables, client BatchWriterClient, opts ...Option) (*BatchWriter, error) { + c := &BatchWriter{ + tables: tables, + client: client, + workers: make(map[string]*worker), + workersLock: &sync.Mutex{}, + logger: zerolog.Nop(), + batchTimeout: defaultBatchTimeoutSeconds * time.Second, + batchSize: defaultBatchSize, + batchSizeBytes: defaultBatchSizeBytes, + } + for _, opt := range opts { + opt(c) + } + return c, nil +} + +func (w *BatchWriter) worker(ctx context.Context, table *schema.Table, ch <-chan arrow.Record, flush <-chan chan bool) { sizeBytes := int64(0) resources := make([]arrow.Record, 0) for { @@ -27,26 +93,26 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab case r, ok := <-ch: if !ok { if len(resources) > 0 { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) } return } if uint64(len(resources)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) resources = make([]arrow.Record, 0) sizeBytes = 0 } resources = append(resources, r) sizeBytes += util.TotalRecordSize(r) - case <-time.After(p.batchTimeout): + case <-time.After(w.batchTimeout): if len(resources) > 0 { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) resources = make([]arrow.Record, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) resources = make([]arrow.Record, 0) sizeBytes = 0 } @@ -58,21 +124,18 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab } } -func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, resources []arrow.Record) { - resources = p.removeDuplicatesByPK(table, resources) +func (w *BatchWriter) flush(ctx context.Context, table *schema.Table, resources []arrow.Record) { + resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) - if err := p.client.WriteTableBatch(ctx, table, writeMode, resources); err != nil { - p.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") - // we don't return an error as we need to continue until channel is closed otherwise there will be a deadlock - // atomic.AddUint64(&metrics.Errors, uint64(batchSize)) + if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { + w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") } else { - p.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") - // atomic.AddUint64(&metrics.Writes, uint64(batchSize)) + w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") } } -func (*Plugin) removeDuplicatesByPK(table *schema.Table, resources []arrow.Record) []arrow.Record { +func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow.Record) []arrow.Record { pkIndices := table.PrimaryKeysIndexes() // special case where there's no PK at all if len(pkIndices) == 0 { @@ -99,18 +162,17 @@ func (*Plugin) removeDuplicatesByPK(table *schema.Table, resources []arrow.Recor return res } -func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { - workers := make(map[string]*worker, len(tables)) - metrics := &Metrics{} +func (w *BatchWriter) Write(ctx context.Context, res <-chan arrow.Record) error { + workers := make(map[string]*worker, len(w.tables)) - p.workersLock.Lock() - for _, table := range tables { + w.workersLock.Lock() + for _, table := range w.tables { table := table - if p.workers[table.Name] == nil { + if w.workers[table.Name] == nil { ch := make(chan arrow.Record) flush := make(chan chan bool) wg := &sync.WaitGroup{} - p.workers[table.Name] = &worker{ + w.workers[table.Name] = &worker{ count: 1, ch: ch, flush: flush, @@ -119,16 +181,16 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Table wg.Add(1) go func() { defer wg.Done() - p.worker(ctx, metrics, table, writeMode, ch, flush) + w.worker(ctx, table, ch, flush) }() } else { - p.workers[table.Name].count++ + w.workers[table.Name].count++ } // we save this locally because we don't want to access the map after that so we can // keep the workersLock for as short as possible - workers[table.Name] = p.workers[table.Name] + workers[table.Name] = w.workers[table.Name] } - p.workersLock.Unlock() + w.workersLock.Unlock() for r := range res { tableName, ok := r.Schema().Metadata().GetValue(schema.MetadataTableName) @@ -153,15 +215,15 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Table <-flushChannels[tableName] } - p.workersLock.Lock() + w.workersLock.Lock() for tableName := range workers { - p.workers[tableName].count-- - if p.workers[tableName].count == 0 { - close(p.workers[tableName].ch) - p.workers[tableName].wg.Wait() - delete(p.workers, tableName) + w.workers[tableName].count-- + if w.workers[tableName].count == 0 { + close(w.workers[tableName].ch) + w.workers[tableName].wg.Wait() + delete(w.workers, tableName) } } - p.workersLock.Unlock() + w.workersLock.Unlock() return nil } diff --git a/writers/batch_test.go b/writers/batch_test.go new file mode 100644 index 0000000000..0ca94fc1ad --- /dev/null +++ b/writers/batch_test.go @@ -0,0 +1,56 @@ +package writers + +import ( + "context" + "testing" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type testBatchClient struct { +} + +func (c *testBatchClient) WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error { + return nil +} + +func TestBatchWriter(t *testing.T) { + ctx := context.Background() + tables := schema.Tables{ + { + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + { + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + } + + wr, err := NewBatchWriter(tables, &testBatchClient{}) + if err != nil { + t.Fatal(err) + } + ch := make(chan arrow.Record, 1) + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) + bldr.Field(0).(*array.Int64Builder).Append(1) + ch <- bldr.NewRecord() + close(ch) + if err := wr.Write(ctx, ch); err != nil { + t.Fatal(err) + } +} From e239857aa72386f8769421fad63f865ec3f396dd Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 5 Jun 2023 23:21:18 +0300 Subject: [PATCH 013/125] wip --- plugin/plugin.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/plugin/plugin.go b/plugin/plugin.go index 452d650196..d52fcdf2ae 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,10 +7,8 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v4/caser" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" - "golang.org/x/sync/semaphore" ) type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) @@ -59,14 +57,8 @@ type Plugin struct { newClient NewClientFunc // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. logger zerolog.Logger - // resourceSem is a semaphore that limits the number of concurrent resources being fetched - resourceSem *semaphore.Weighted - // tableSem is a semaphore that limits the number of concurrent tables being fetched - tableSems []*semaphore.Weighted // maxDepth is the max depth of tables maxDepth uint64 - // caser - caser *caser.Caser // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) mu sync.Mutex // client is the initialized session client @@ -78,9 +70,6 @@ type Plugin struct { internalColumns bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation titleTransformer func(*schema.Table) string - syncTime time.Time - sourceName string - deterministicCQId bool } const ( @@ -108,7 +97,6 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... name: name, version: version, internalColumns: true, - caser: caser.New(), titleTransformer: DefaultTitleTransformer, newClient: newClient, } From 71a960fb89b42c47903c44fd98434926b1c1b971 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Tue, 6 Jun 2023 01:20:31 +0300 Subject: [PATCH 014/125] more wip --- docs/docs.go | 137 ++++++++++ {plugin => docs}/docs_test.go | 10 +- docs/json.go | 62 +++++ docs/markdown.go | 94 +++++++ .../templates/all_tables.md.go.tpl | 0 .../templates/all_tables_entry.md.go.tpl | 0 {plugin => docs}/templates/table.md.go.tpl | 0 .../TestGeneratePluginDocs-JSON-__tables.json | 0 .../TestGeneratePluginDocs-Markdown-README.md | 0 ...tePluginDocs-Markdown-incremental_table.md | 0 ...Docs-Markdown-relation_relation_table_a.md | 0 ...Docs-Markdown-relation_relation_table_b.md | 0 ...eratePluginDocs-Markdown-relation_table.md | 0 ...tGeneratePluginDocs-Markdown-test_table.md | 0 helpers/internal_columns.go | 2 +- internal/memdb/memdb.go | 1 - internal/servers/plugin/v3/plugin.go | 37 --- plugin/docs.go | 246 ------------------ plugin/options.go | 30 +-- plugin/plugin.go | 2 +- plugin/plugin_reader.go | 1 - .../benchmark_test.go.backup | 0 serve/docs.go | 47 ++++ serve/docs_test.go | 1 + serve/plugin.go | 30 --- transformers/tables.go | 2 +- 26 files changed, 349 insertions(+), 353 deletions(-) create mode 100644 docs/docs.go rename {plugin => docs}/docs_test.go (92%) create mode 100644 docs/json.go create mode 100644 docs/markdown.go rename {plugin => docs}/templates/all_tables.md.go.tpl (100%) rename {plugin => docs}/templates/all_tables_entry.md.go.tpl (100%) rename {plugin => docs}/templates/table.md.go.tpl (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-JSON-__tables.json (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-README.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-relation_table.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-test_table.md (100%) delete mode 100644 plugin/docs.go rename {plugin => scheduler}/benchmark_test.go.backup (100%) create mode 100644 serve/docs.go create mode 100644 serve/docs_test.go diff --git a/docs/docs.go b/docs/docs.go new file mode 100644 index 0000000000..62dba4f67b --- /dev/null +++ b/docs/docs.go @@ -0,0 +1,137 @@ +package docs + +import ( + "embed" + "fmt" + "os" + "regexp" + "sort" + + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +//go:embed templates/*.go.tpl +var templatesFS embed.FS + +var reMatchNewlines = regexp.MustCompile(`\n{3,}`) +var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) + +var DefaultTitleExceptions = map[string]string{ + // common abbreviations + "acl": "ACL", + "acls": "ACLs", + "api": "API", + "apis": "APIs", + "ca": "CA", + "cidr": "CIDR", + "cidrs": "CIDRs", + "db": "DB", + "dbs": "DBs", + "dhcp": "DHCP", + "iam": "IAM", + "iot": "IOT", + "ip": "IP", + "ips": "IPs", + "ipv4": "IPv4", + "ipv6": "IPv6", + "mfa": "MFA", + "ml": "ML", + "oauth": "OAuth", + "vpc": "VPC", + "vpcs": "VPCs", + "vpn": "VPN", + "vpns": "VPNs", + "waf": "WAF", + "wafs": "WAFs", + + // cloud providers + "aws": "AWS", + "gcp": "GCP", +} + +type Format int + +const ( + FormatMarkdown Format = iota + FormatJSON +) + +func (r Format) String() string { + return [...]string{"markdown", "json"}[r] +} + +func FormatFromString(s string) (Format, error) { + switch s { + case "markdown": + return FormatMarkdown, nil + case "json": + return FormatJSON, nil + default: + return FormatMarkdown, fmt.Errorf("unknown format %s", s) + } +} + +type Generator struct { + tables schema.Tables + titleTransformer func(*schema.Table) string + pluginName string +} + +func DefaultTitleTransformer(table *schema.Table) string { + if table.Title != "" { + return table.Title + } + csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) + return csr.ToTitle(table.Name) +} + +func sortTables(tables schema.Tables) { + sort.SliceStable(tables, func(i, j int) bool { + return tables[i].Name < tables[j].Name + }) + + for _, table := range tables { + sortTables(table.Relations) + } +} + +// NewGenerator creates a new generator for the given tables. +// The tables are sorted by name. pluginName is optional and is used in markdown only +func NewGenerator(pluginName string, tables schema.Tables) *Generator { + sortedTables := make(schema.Tables, 0, len(tables)) + for _, t := range tables { + sortedTables = append(sortedTables, t.Copy(nil)) + } + sortTables(sortedTables) + + return &Generator{ + tables: sortedTables, + titleTransformer: DefaultTitleTransformer, + pluginName: pluginName, + } +} + +func (g *Generator) Generate(dir string, format Format) error { + if err := os.MkdirAll(dir, os.ModePerm); err != nil { + return err + } + + switch format { + case FormatMarkdown: + return g.renderTablesAsMarkdown(dir) + case FormatJSON: + return g.renderTablesAsJSON(dir) + default: + return fmt.Errorf("unsupported format: %v", format) + } +} + +// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). +// func setDestinationManagedCqColumns(tables []*schema.Table) { +// for _, table := range tables { +// table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) +// table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) +// setDestinationManagedCqColumns(table.Relations) +// } +// } diff --git a/plugin/docs_test.go b/docs/docs_test.go similarity index 92% rename from plugin/docs_test.go rename to docs/docs_test.go index 878e006e88..22d4001719 100644 --- a/plugin/docs_test.go +++ b/docs/docs_test.go @@ -1,6 +1,6 @@ //go:build !windows -package plugin +package docs import ( "os" @@ -9,7 +9,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/bradleyjkemp/cupaloy/v2" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/stretchr/testify/require" @@ -121,14 +120,13 @@ var testTables = []*schema.Table{ } func TestGeneratePluginDocs(t *testing.T) { - p := NewPlugin("test", "v1.0.0", newTestExecutionClient, WithStaticTables(testTables)) - + g := NewGenerator("test", testTables) cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) t.Run("Markdown", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_MARKDOWN) + err := g.Generate(tmpdir, FormatMarkdown) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } @@ -147,7 +145,7 @@ func TestGeneratePluginDocs(t *testing.T) { t.Run("JSON", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_JSON) + err := g.Generate(tmpdir, FormatJSON) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } diff --git a/docs/json.go b/docs/json.go new file mode 100644 index 0000000000..8972a86b8c --- /dev/null +++ b/docs/json.go @@ -0,0 +1,62 @@ +package docs + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type jsonTable struct { + Name string `json:"name"` + Title string `json:"title"` + Description string `json:"description"` + Columns []jsonColumn `json:"columns"` + Relations []jsonTable `json:"relations"` +} + +type jsonColumn struct { + Name string `json:"name"` + Type string `json:"type"` + IsPrimaryKey bool `json:"is_primary_key,omitempty"` + IsIncrementalKey bool `json:"is_incremental_key,omitempty"` +} + +func (g *Generator) renderTablesAsJSON(dir string) error { + jsonTables := g.jsonifyTables(g.tables) + buffer := &bytes.Buffer{} + m := json.NewEncoder(buffer) + m.SetIndent("", " ") + m.SetEscapeHTML(false) + err := m.Encode(jsonTables) + if err != nil { + return err + } + outputPath := filepath.Join(dir, "__tables.json") + return os.WriteFile(outputPath, buffer.Bytes(), 0644) +} + +func (g *Generator) jsonifyTables(tables schema.Tables) []jsonTable { + jsonTables := make([]jsonTable, len(tables)) + for i, table := range tables { + jsonColumns := make([]jsonColumn, len(table.Columns)) + for c, col := range table.Columns { + jsonColumns[c] = jsonColumn{ + Name: col.Name, + Type: col.Type.String(), + IsPrimaryKey: col.PrimaryKey, + IsIncrementalKey: col.IncrementalKey, + } + } + jsonTables[i] = jsonTable{ + Name: table.Name, + Title: g.titleTransformer(table), + Description: table.Description, + Columns: jsonColumns, + Relations: g.jsonifyTables(table.Relations), + } + } + return jsonTables +} diff --git a/docs/markdown.go b/docs/markdown.go new file mode 100644 index 0000000000..6f8fe9dcaa --- /dev/null +++ b/docs/markdown.go @@ -0,0 +1,94 @@ +package docs + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "text/template" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type templateData struct { + PluginName string + Tables schema.Tables +} + +func (g *Generator) renderTablesAsMarkdown(dir string) error { + for _, table := range g.tables { + if err := g.renderAllTables(dir, table); err != nil { + return err + } + } + t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ + "indentToDepth": indentToDepth, + }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template for README.md: %v", err) + } + + var b bytes.Buffer + if err := t.Execute(&b, templateData{PluginName: g.pluginName, Tables: g.tables}); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + outputPath := filepath.Join(dir, "README.md") + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return nil +} + +func (g *Generator) renderAllTables(dir string, t *schema.Table) error { + if err := g.renderTable(dir, t); err != nil { + return err + } + for _, r := range t.Relations { + if err := g.renderAllTables(dir, r); err != nil { + return err + } + } + return nil +} + +func (g *Generator) renderTable(dir string, table *schema.Table) error { + t := template.New("").Funcs(map[string]any{ + "title": g.titleTransformer, + }) + t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template: %v", err) + } + + outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) + + var b bytes.Buffer + if err := t.Execute(&b, table); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return f.Close() +} + +func formatMarkdown(s string) string { + s = reMatchNewlines.ReplaceAllString(s, "\n\n") + return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") +} + +func indentToDepth(table *schema.Table) string { + s := "" + t := table + for t.Parent != nil { + s += " " + t = t.Parent + } + return s +} diff --git a/plugin/templates/all_tables.md.go.tpl b/docs/templates/all_tables.md.go.tpl similarity index 100% rename from plugin/templates/all_tables.md.go.tpl rename to docs/templates/all_tables.md.go.tpl diff --git a/plugin/templates/all_tables_entry.md.go.tpl b/docs/templates/all_tables_entry.md.go.tpl similarity index 100% rename from plugin/templates/all_tables_entry.md.go.tpl rename to docs/templates/all_tables_entry.md.go.tpl diff --git a/plugin/templates/table.md.go.tpl b/docs/templates/table.md.go.tpl similarity index 100% rename from plugin/templates/table.md.go.tpl rename to docs/templates/table.md.go.tpl diff --git a/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json b/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json rename to docs/testdata/TestGeneratePluginDocs-JSON-__tables.json diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md b/docs/testdata/TestGeneratePluginDocs-Markdown-README.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-README.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-README.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md diff --git a/helpers/internal_columns.go b/helpers/internal_columns.go index 12668d607f..345b806ac4 100644 --- a/helpers/internal_columns.go +++ b/helpers/internal_columns.go @@ -1 +1 @@ -package helpers \ No newline at end of file +package helpers diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 13ad7f74b9..a23316939b 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -155,7 +155,6 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resourc return nil } - func (c *client) Close(context.Context) error { c.memoryDB = nil return nil diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 00e2543d99..92c7c27cc1 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -6,8 +6,6 @@ import ( "errors" "fmt" "io" - "os" - "path/filepath" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" @@ -44,7 +42,6 @@ func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTable }, nil } - func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { return &pb.GetName_Response{ Name: s.Plugin.Name(), @@ -248,40 +245,6 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } } -func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { - tmpDir, err := os.MkdirTemp("", "cloudquery-docs") - if err != nil { - return fmt.Errorf("failed to create tmp dir: %w", err) - } - defer os.RemoveAll(tmpDir) - err = s.Plugin.GeneratePluginDocs(tmpDir, req.Format) - if err != nil { - return fmt.Errorf("failed to generate docs: %w", err) - } - - // list files in tmpDir - files, err := os.ReadDir(tmpDir) - if err != nil { - return fmt.Errorf("failed to read tmp dir: %w", err) - } - for _, f := range files { - if f.IsDir() { - continue - } - content, err := os.ReadFile(filepath.Join(tmpDir, f.Name())) - if err != nil { - return fmt.Errorf("failed to read file: %w", err) - } - if err := srv.Send(&pb.GenDocs_Response{ - Filename: f.Name(), - Content: content, - }); err != nil { - return fmt.Errorf("failed to send file: %w", err) - } - } - return nil -} - func checkMessageSize(msg proto.Message, record arrow.Record) error { size := proto.Size(msg) // log error to Sentry if row exceeds half of the max size diff --git a/plugin/docs.go b/plugin/docs.go deleted file mode 100644 index 6e4dccf581..0000000000 --- a/plugin/docs.go +++ /dev/null @@ -1,246 +0,0 @@ -package plugin - -import ( - "bytes" - "context" - "embed" - "encoding/json" - "fmt" - "os" - "path/filepath" - "regexp" - "sort" - "text/template" - - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/caser" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -//go:embed templates/*.go.tpl -var templatesFS embed.FS - -var reMatchNewlines = regexp.MustCompile(`\n{3,}`) -var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) - -var DefaultTitleExceptions = map[string]string{ - // common abbreviations - "acl": "ACL", - "acls": "ACLs", - "api": "API", - "apis": "APIs", - "ca": "CA", - "cidr": "CIDR", - "cidrs": "CIDRs", - "db": "DB", - "dbs": "DBs", - "dhcp": "DHCP", - "iam": "IAM", - "iot": "IOT", - "ip": "IP", - "ips": "IPs", - "ipv4": "IPv4", - "ipv6": "IPv6", - "mfa": "MFA", - "ml": "ML", - "oauth": "OAuth", - "vpc": "VPC", - "vpcs": "VPCs", - "vpn": "VPN", - "vpns": "VPNs", - "waf": "WAF", - "wafs": "WAFs", - - // cloud providers - "aws": "AWS", - "gcp": "GCP", -} - -func DefaultTitleTransformer(table *schema.Table) string { - if table.Title != "" { - return table.Title - } - csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) - return csr.ToTitle(table.Name) -} - -func sortTables(tables schema.Tables) { - sort.SliceStable(tables, func(i, j int) bool { - return tables[i].Name < tables[j].Name - }) - - for _, table := range tables { - sortTables(table.Relations) - } -} - -type templateData struct { - PluginName string - Tables schema.Tables -} - -// GeneratePluginDocs creates table documentation for the source plugin based on its list of tables -func (p *Plugin) GeneratePluginDocs(dir string, format pbPlugin.GenDocs_FORMAT) error { - if err := os.MkdirAll(dir, os.ModePerm); err != nil { - return err - } - tables, err := p.Tables(context.Background()) - if err != nil { - return err - } - setDestinationManagedCqColumns(tables) - - sortedTables := make(schema.Tables, 0, len(tables)) - for _, t := range tables { - sortedTables = append(sortedTables, t.Copy(nil)) - } - sortTables(sortedTables) - - switch format { - case pbPlugin.GenDocs_FORMAT_MARKDOWN: - return p.renderTablesAsMarkdown(dir, p.name, sortedTables) - case pbPlugin.GenDocs_FORMAT_JSON: - return p.renderTablesAsJSON(dir, sortedTables) - default: - return fmt.Errorf("unsupported format: %v", format) - } -} - -// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). -func setDestinationManagedCqColumns(tables []*schema.Table) { - for _, table := range tables { - table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) - table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) - setDestinationManagedCqColumns(table.Relations) - } -} - -type jsonTable struct { - Name string `json:"name"` - Title string `json:"title"` - Description string `json:"description"` - Columns []jsonColumn `json:"columns"` - Relations []jsonTable `json:"relations"` -} - -type jsonColumn struct { - Name string `json:"name"` - Type string `json:"type"` - IsPrimaryKey bool `json:"is_primary_key,omitempty"` - IsIncrementalKey bool `json:"is_incremental_key,omitempty"` -} - -func (p *Plugin) renderTablesAsJSON(dir string, tables schema.Tables) error { - jsonTables := p.jsonifyTables(tables) - buffer := &bytes.Buffer{} - m := json.NewEncoder(buffer) - m.SetIndent("", " ") - m.SetEscapeHTML(false) - err := m.Encode(jsonTables) - if err != nil { - return err - } - outputPath := filepath.Join(dir, "__tables.json") - return os.WriteFile(outputPath, buffer.Bytes(), 0644) -} - -func (p *Plugin) jsonifyTables(tables schema.Tables) []jsonTable { - jsonTables := make([]jsonTable, len(tables)) - for i, table := range tables { - jsonColumns := make([]jsonColumn, len(table.Columns)) - for c, col := range table.Columns { - jsonColumns[c] = jsonColumn{ - Name: col.Name, - Type: col.Type.String(), - IsPrimaryKey: col.PrimaryKey, - IsIncrementalKey: col.IncrementalKey, - } - } - jsonTables[i] = jsonTable{ - Name: table.Name, - Title: p.titleTransformer(table), - Description: table.Description, - Columns: jsonColumns, - Relations: p.jsonifyTables(table.Relations), - } - } - return jsonTables -} - -func (p *Plugin) renderTablesAsMarkdown(dir string, pluginName string, tables schema.Tables) error { - for _, table := range tables { - if err := p.renderAllTables(table, dir); err != nil { - return err - } - } - t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ - "indentToDepth": indentToDepth, - }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template for README.md: %v", err) - } - - var b bytes.Buffer - if err := t.Execute(&b, templateData{PluginName: pluginName, Tables: tables}); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - outputPath := filepath.Join(dir, "README.md") - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return nil -} - -func (p *Plugin) renderAllTables(t *schema.Table, dir string) error { - if err := p.renderTable(t, dir); err != nil { - return err - } - for _, r := range t.Relations { - if err := p.renderAllTables(r, dir); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) renderTable(table *schema.Table, dir string) error { - t := template.New("").Funcs(map[string]any{ - "title": p.titleTransformer, - }) - t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template: %v", err) - } - - outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) - - var b bytes.Buffer - if err := t.Execute(&b, table); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return f.Close() -} - -func formatMarkdown(s string) string { - s = reMatchNewlines.ReplaceAllString(s, "\n\n") - return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") -} - -func indentToDepth(table *schema.Table) string { - s := "" - t := table - for t.Parent != nil { - s += " " - t = t.Parent - } - return s -} diff --git a/plugin/options.go b/plugin/options.go index aaa8687a51..ebdf1329f4 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,8 +1,6 @@ package plugin import ( - "fmt" - "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -21,31 +19,6 @@ func (m MigrateMode) String() string { return migrateModeStrings[m] } -type Registry int - -const ( - RegistryGithub Registry = iota - RegistryLocal - RegistryGrpc -) - -func (r Registry) String() string { - return [...]string{"github", "local", "grpc"}[r] -} - -func RegistryFromString(s string) (Registry, error) { - switch s { - case "github": - return RegistryGithub, nil - case "local": - return RegistryLocal, nil - case "grpc": - return RegistryGrpc, nil - default: - return RegistryGithub, fmt.Errorf("unknown registry %s", s) - } -} - type WriteMode int const ( @@ -64,7 +37,6 @@ func (m WriteMode) String() string { type Option func(*Plugin) - // WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables func WithNoInternalColumns() Option { return func(p *Plugin) { @@ -78,4 +50,4 @@ func WithTitleTransformer(t func(*schema.Table) string) Option { return func(p *Plugin) { p.titleTransformer = t } -} \ No newline at end of file +} diff --git a/plugin/plugin.go b/plugin/plugin.go index d52fcdf2ae..05dfcff7ee 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -69,7 +69,7 @@ type Plugin struct { // useful for sources such as PostgreSQL and other databases internalColumns bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string + titleTransformer func(*schema.Table) string } const ( diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index da76a3ed97..57f9f52bea 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -48,7 +48,6 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } - func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) diff --git a/plugin/benchmark_test.go.backup b/scheduler/benchmark_test.go.backup similarity index 100% rename from plugin/benchmark_test.go.backup rename to scheduler/benchmark_test.go.backup diff --git a/serve/docs.go b/serve/docs.go new file mode 100644 index 0000000000..442b6308f1 --- /dev/null +++ b/serve/docs.go @@ -0,0 +1,47 @@ +package serve + +import ( + "fmt" + "strings" + + "github.com/cloudquery/plugin-sdk/v4/docs" + "github.com/spf13/cobra" +) + +const ( + pluginDocShort = "Generate documentation for tables" + pluginDocLong = `Generate documentation for tables + +If format is markdown, a destination directory will be created (if necessary) containing markdown files. +Example: +doc ./output + +If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. +Example: +doc --format json . +` +) + +func (s *PluginServe) newCmdPluginDoc() *cobra.Command { + format := newEnum([]string{"json", "markdown"}, "markdown") + cmd := &cobra.Command{ + Use: "doc ", + Short: pluginDocShort, + Long: pluginDocLong, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + tables, err := s.plugin.Tables(cmd.Context()) + if err != nil { + return err + } + g := docs.NewGenerator(s.plugin.Name(), tables) + f := docs.FormatMarkdown + if format.Value == "json" { + f = docs.FormatJSON + } + return g.Generate(args[0], f) + }, + } + cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) + return cmd +} diff --git a/serve/docs_test.go b/serve/docs_test.go new file mode 100644 index 0000000000..9b65230168 --- /dev/null +++ b/serve/docs_test.go @@ -0,0 +1 @@ +package serve diff --git a/serve/plugin.go b/serve/plugin.go index f64d0ba1ec..9c55830987 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -245,36 +245,6 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { return cmd } -const ( - pluginDocShort = "Generate documentation for tables" - pluginDocLong = `Generate documentation for tables - -If format is markdown, a destination directory will be created (if necessary) containing markdown files. -Example: -doc ./output - -If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. -Example: -doc --format json . -` -) - -func (s *PluginServe) newCmdPluginDoc() *cobra.Command { - format := newEnum([]string{"json", "markdown"}, "markdown") - cmd := &cobra.Command{ - Use: "doc ", - Short: pluginDocShort, - Long: pluginDocLong, - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) - return s.plugin.GeneratePluginDocs(args[0], pbFormat) - }, - } - cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) - return cmd -} - func (s *PluginServe) newCmdPluginRoot() *cobra.Command { cmd := &cobra.Command{ Use: fmt.Sprintf("%s ", s.plugin.Name()), diff --git a/transformers/tables.go b/transformers/tables.go index 94532c6ca3..99b563e2e5 100644 --- a/transformers/tables.go +++ b/transformers/tables.go @@ -55,4 +55,4 @@ func TransformTables(tables schema.Tables) error { } } return nil -} \ No newline at end of file +} From b66e12a0a6278ff131c6d38b40ae646774e7bbdb Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 10 Jun 2023 17:42:24 +0300 Subject: [PATCH 015/125] more wip --- cover | 199 +++++++ docs/{docs.go => generator.go} | 0 docs/{docs_test.go => generator_test.go} | 0 .../TestGeneratePluginDocs-JSON-__tables.json | 100 ---- ...tePluginDocs-Markdown-incremental_table.md | 4 - ...Docs-Markdown-relation_relation_table_a.md | 6 +- ...Docs-Markdown-relation_relation_table_b.md | 6 +- ...eratePluginDocs-Markdown-relation_table.md | 6 +- ...tGeneratePluginDocs-Markdown-test_table.md | 4 - go.mod | 7 +- go.sum | 7 +- plugin/messages.go | 84 +++ plugin/options.go | 19 - plugin/plugin.go | 49 +- plugin/plugin_reader.go | 22 +- plugin/plugin_writer.go | 56 +- plugin/testing_overwrite_deletestale.go | 168 ------ plugin/testing_sync.go | 18 +- plugin/testing_upsert.go | 69 +++ plugin/testing_write.go | 243 ++------- plugin/testing_write_append.go | 95 ---- plugin/testing_write_delete.go | 84 +++ plugin/testing_write_insert.go | 68 +++ plugin/testing_write_migrate.go | 177 +++---- plugin/testing_write_overwrite.go | 115 ----- plugin/testing_write_upsert.go | 69 +++ scheduler/benchmark_test.go | 1 + scheduler/metrics.go | 1 + .../plugin_managed_source_test.go.backup | 484 ------------------ scheduler/scheduler.go | 26 +- scheduler/scheduler_round_robin_test.go | 7 - scheduler/scheduler_test.go | 278 ++++++++++ schema/resource.go | 5 + serve/docs_test.go | 19 + serve/plugin_test.go | 93 +--- ...ate_v3_test.go => state_v3_test.go.backup} | 0 transformers/tables.go | 28 - 37 files changed, 1056 insertions(+), 1561 deletions(-) create mode 100644 cover rename docs/{docs.go => generator.go} (100%) rename docs/{docs_test.go => generator_test.go} (100%) create mode 100644 plugin/messages.go delete mode 100644 plugin/testing_overwrite_deletestale.go create mode 100644 plugin/testing_upsert.go delete mode 100644 plugin/testing_write_append.go create mode 100644 plugin/testing_write_delete.go create mode 100644 plugin/testing_write_insert.go delete mode 100644 plugin/testing_write_overwrite.go create mode 100644 plugin/testing_write_upsert.go create mode 100644 scheduler/benchmark_test.go delete mode 100644 scheduler/plugin_managed_source_test.go.backup create mode 100644 scheduler/scheduler_test.go rename serve/{state_v3_test.go => state_v3_test.go.backup} (100%) diff --git a/cover b/cover new file mode 100644 index 0000000000..5fb4e3be13 --- /dev/null +++ b/cover @@ -0,0 +1,199 @@ +mode: set +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:45.37,47.30 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:53.2,53.24 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:47.30,48.12 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:51.3,51.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:48.12,50.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:56.44,58.2 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:62.47,63.28 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:63.28,65.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:68.59,69.28 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:69.28,71.3 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:74.49,75.28 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:75.28,77.3 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:80.63,81.28 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:81.28,83.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:104.94,113.27 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:116.2,116.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:113.27,115.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:119.78,121.12 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:132.2,132.34 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:139.2,139.12 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:121.12,123.21 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:124.21,125.29 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:126.28,127.36 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:128.11,129.57 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:132.34,138.3 5 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:142.86,144.31 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:144.31,148.3 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:151.157,160.15 9 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:171.2,171.38 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:185.2,185.34 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:189.2,189.39 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:201.2,202.17 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:160.15,161.35 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:161.35,165.47 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:165.47,168.5 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:171.38,172.74 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:172.74,175.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:181.4,181.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:175.38,176.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:176.48,179.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:185.34,187.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:189.39,190.75 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:190.75,193.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:193.38,194.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:194.48,197.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:205.183,208.15 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:221.2,221.23 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:208.15,209.35 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:209.35,213.47 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:213.47,217.5 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:221.23,222.62 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:222.62,225.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:225.38,226.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:226.48,230.6 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:233.8,236.15 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:236.15,238.18 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:238.18,241.39 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:241.39,242.49 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:242.49,246.7 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:253.44,255.22 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:258.2,258.31 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:264.2,264.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:255.22,257.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:258.31,260.23 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:260.23,262.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:269.30,270.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:273.2,273.10 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:270.11,272.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:17.93,24.42 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:29.2,34.33 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:58.2,59.33 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:81.2,81.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:24.42,28.3 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:34.33,36.29 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:40.3,41.29 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:52.3,55.44 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:36.29,38.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:41.29,42.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:42.41,44.5 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:44.10,45.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:49.5,49.110 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:45.48,48.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:59.33,62.34 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:62.34,64.57 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:69.4,70.14 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:64.57,68.5 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:70.14,76.5 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:84.184,89.19 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:92.2,95.12 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:121.2,121.21 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:126.2,126.19 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:89.19,91.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:95.12,96.16 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:108.3,108.66 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:96.16,97.36 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:106.4,106.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:97.36,99.48 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:103.5,104.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:99.48,102.6 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:108.66,111.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:117.4,117.10 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:111.38,112.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:112.48,115.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:121.21,123.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:126.19,129.3 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:132.203,134.30 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:137.2,138.12 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:194.2,195.38 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:213.2,213.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:134.30,136.3 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:138.12,142.33 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:191.3,191.12 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:142.33,144.56 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:150.4,151.14 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:144.56,149.5 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:151.14,156.32 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:160.5,160.79 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:174.5,174.55 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:188.5,188.38 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:156.32,158.6 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:160.79,163.86 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:171.6,172.12 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:163.86,166.50 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:166.50,169.8 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:174.55,177.86 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:185.6,186.12 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:177.86,180.50 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:180.50,183.8 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:195.38,198.53 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:198.53,200.61 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:205.4,206.14 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:200.61,204.5 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:206.14,210.5 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:16.100,21.42 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:26.2,31.33 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:42.2,45.34 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:66.2,66.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:21.42,25.3 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:31.33,33.29 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:36.3,39.44 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:33.29,35.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:45.34,48.56 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:53.3,54.13 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:48.56,52.4 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:54.13,62.4 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:71.108,74.6 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:87.2,87.21 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:74.6,76.32 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:82.3,83.16 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:76.32,77.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:77.41,80.5 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:83.16,84.9 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:23.68,25.2 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:28.46,29.48 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:42.2,42.52 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:55.2,55.13 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:29.48,30.42 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:30.42,31.46 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:34.4,34.54 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:37.4,37.54 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:31.46,33.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:34.54,36.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:37.54,39.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:42.52,43.42 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:43.42,44.42 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:47.4,47.50 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:50.4,50.50 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:44.42,46.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:47.50,49.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:50.50,52.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:58.85,60.33 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:63.2,63.43 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:60.33,62.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:63.43,65.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:68.40,70.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:75.2,75.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:70.46,71.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:71.41,73.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:78.46,80.46 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:85.2,85.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:80.46,81.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:81.41,83.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:88.40,90.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:95.2,95.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:90.46,91.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:91.41,93.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:98.46,100.46 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:105.2,105.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:100.46,101.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:101.41,103.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:108.43,110.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:115.2,115.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:110.46,111.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:111.41,113.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:118.49,120.46 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:125.2,125.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:120.46,121.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:121.41,123.4 1 0 diff --git a/docs/docs.go b/docs/generator.go similarity index 100% rename from docs/docs.go rename to docs/generator.go diff --git a/docs/docs_test.go b/docs/generator_test.go similarity index 100% rename from docs/docs_test.go rename to docs/generator_test.go diff --git a/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json b/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json index 7a8280833e..2623746cb5 100644 --- a/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json +++ b/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json @@ -4,22 +4,6 @@ "title": "Incremental Table", "description": "Description for incremental table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "int_col", "type": "int64" @@ -43,22 +27,6 @@ "title": "Test Table", "description": "Description for test table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "int_col", "type": "int64" @@ -96,23 +64,6 @@ "title": "Relation Table", "description": "Description for relational table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" @@ -124,23 +75,6 @@ "title": "Relation Relation Table A", "description": "Description for relational table's relation", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" @@ -153,23 +87,6 @@ "title": "Relation Relation Table B", "description": "Description for relational table's relation", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" @@ -184,23 +101,6 @@ "title": "Relation Table2", "description": "Description for second relational table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md index 67ca4b8539..4148e838eb 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md @@ -11,10 +11,6 @@ It supports incremental syncs based on the (**id_col**, **id_col2**) columns. | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id|uuid| -|_cq_parent_id|uuid| |int_col|int64| |id_col (PK) (Incremental Key)|int64| |id_col2 (Incremental Key)|int64| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md index 038791b13e..1c0b8b63c8 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md @@ -4,7 +4,7 @@ This table shows data for Relation Relation Table A. Description for relational table's relation -The primary key for this table is **_cq_id**. +The composite primary key for this table is (). ## Relations @@ -14,8 +14,4 @@ This table depends on [relation_table](relation_table.md). | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id (PK)|uuid| -|_cq_parent_id|uuid| |string_col|utf8| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md index 432f6533f8..77dce363dc 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md @@ -4,7 +4,7 @@ This table shows data for Relation Relation Table B. Description for relational table's relation -The primary key for this table is **_cq_id**. +The composite primary key for this table is (). ## Relations @@ -14,8 +14,4 @@ This table depends on [relation_table](relation_table.md). | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id (PK)|uuid| -|_cq_parent_id|uuid| |string_col|utf8| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md index 7db8baff7e..96b152a8fe 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md @@ -4,7 +4,7 @@ This table shows data for Relation Table. Description for relational table -The primary key for this table is **_cq_id**. +The composite primary key for this table is (). ## Relations @@ -18,8 +18,4 @@ The following tables depend on relation_table: | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id (PK)|uuid| -|_cq_parent_id|uuid| |string_col|utf8| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md index f0c91578a5..089a0b4b3e 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md @@ -16,10 +16,6 @@ The following tables depend on test_table: | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id|uuid| -|_cq_parent_id|uuid| |int_col|int64| |id_col (PK)|int64| |id_col2 (PK)|int64| diff --git a/go.mod b/go.mod index de7db51fe8..985ded470a 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,11 @@ require ( google.golang.org/protobuf v1.30.0 ) +<<<<<<< HEAD replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 +======= +replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c +>>>>>>> 7e5547e (more wip) replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go @@ -36,6 +40,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v23.1.21+incompatible // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/klauspost/compress v1.16.0 // indirect @@ -46,7 +51,7 @@ require ( github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect - github.com/pierrec/lz4/v4 v4.1.15 // indirect + github.com/pierrec/lz4/v4 v4.1.17 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/schollz/progressbar/v3 v3.13.1 // indirect diff --git a/go.sum b/go.sum index 0ebdc98a2f..6d7acabc3e 100644 --- a/go.sum +++ b/go.sum @@ -47,8 +47,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSEGQNLHpUQ5cU4L4aF7cuJZRnc1toIIWqC1gmPg= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c h1:nQSB4v0QxCW5XDLvVBcaNrsJ+J/esMBoFYjymllxM1E= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= @@ -153,8 +153,8 @@ github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -190,7 +190,6 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.19.0/go.mod h1:IzD0RJ65iWH0w97OQQebJEvTZYvsCUm9WVLWBQrJRjo= diff --git a/plugin/messages.go b/plugin/messages.go new file mode 100644 index 0000000000..fa975cc97c --- /dev/null +++ b/plugin/messages.go @@ -0,0 +1,84 @@ +package plugin + +import ( + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type MessageType int + +const ( + // Create table + MessageTypeCreate MessageType = iota + // Insert record + MessageTypeInsert + // Insert or update record + MessageTypeUpsert + // Delete rows + MessageTypeDelete +) + +type MessageCreateTable struct { + Table *schema.Table + Force bool +} + +func (*MessageCreateTable) Type() MessageType { + return MessageTypeCreate +} + +type MessageInsert struct { + Record arrow.Record + Columns []string + Upsert bool +} + +func (*MessageInsert) Type() MessageType { + return MessageTypeInsert +} + +type Operator int + +const ( + OperatorEqual Operator = iota + OperatorNotEqual + OperatorGreaterThan + OperatorGreaterThanOrEqual + OperatorLessThan + OperatorLessThanOrEqual +) + +type WhereClause struct { + Column string + Operator Operator + Value string +} + +type MessageDelete struct { + Record arrow.Record + // currently delete only supports and where clause as we don't support + // full AST parsing + WhereClauses []WhereClause +} + +func (*MessageDelete) Type() MessageType { + return MessageTypeDelete +} + +type Message interface { + Type() MessageType +} + +type Messages []Message + +func (m Messages) InsertItems() int64 { + items := int64(0) + for _, msg := range m { + switch msg.Type() { + case MessageTypeInsert: + msgInsert := msg.(*MessageInsert) + items += msgInsert.Record.NumRows() + } + } + return items +} diff --git a/plugin/options.go b/plugin/options.go index ebdf1329f4..09a771d0b6 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,9 +1,5 @@ package plugin -import ( - "github.com/cloudquery/plugin-sdk/v4/schema" -) - type MigrateMode int const ( @@ -36,18 +32,3 @@ func (m WriteMode) String() string { } type Option func(*Plugin) - -// WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables -func WithNoInternalColumns() Option { - return func(p *Plugin) { - p.internalColumns = false - } -} - -// WithTitleTransformer allows the plugin to control how table names get turned into titles for the -// generated documentation. -func WithTitleTransformer(t func(*schema.Table) string) Option { - return func(p *Plugin) { - p.titleTransformer = t - } -} diff --git a/plugin/plugin.go b/plugin/plugin.go index 05dfcff7ee..9900d16e26 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -4,9 +4,7 @@ import ( "context" "fmt" "sync" - "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -15,30 +13,20 @@ type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { Tables(ctx context.Context) (schema.Tables, error) - Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error - Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateOptions) error - Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error - DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error + Sync(ctx context.Context, options SyncOptions, res chan<- Message) error + Write(ctx context.Context, options WriteOptions, res <-chan Message) error Close(ctx context.Context) error } type UnimplementedWriter struct{} -func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { - return fmt.Errorf("not implemented") -} - -func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { - return fmt.Errorf("not implemented") -} - -func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { +func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { return fmt.Errorf("not implemented") } type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { +func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { return fmt.Errorf("not implemented") } @@ -57,8 +45,6 @@ type Plugin struct { newClient NewClientFunc // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. logger zerolog.Logger - // maxDepth is the max depth of tables - maxDepth uint64 // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) mu sync.Mutex // client is the initialized session client @@ -68,8 +54,6 @@ type Plugin struct { // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id // useful for sources such as PostgreSQL and other databases internalColumns bool - // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string } const ( @@ -94,11 +78,10 @@ func maxDepth(tables schema.Tables) uint64 { // Depending on the options, it can be write only plugin, read only plugin or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ - name: name, - version: version, - internalColumns: true, - titleTransformer: DefaultTitleTransformer, - newClient: newClient, + name: name, + version: version, + internalColumns: true, + newClient: newClient, } for _, opt := range options { opt(&p) @@ -125,22 +108,6 @@ func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { if err != nil { return nil, fmt.Errorf("failed to get tables: %w", err) } - setParents(tables, nil) - if err := transformTables(tables); err != nil { - return nil, err - } - if p.internalColumns { - if err := p.addInternalColumns(tables); err != nil { - return nil, err - } - } - p.maxDepth = maxDepth(tables) - if p.maxDepth > maxAllowedDepth { - return nil, fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) - } - if err := p.validate(tables); err != nil { - return nil, err - } return tables, nil } diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 57f9f52bea..89963d7eb4 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -3,9 +3,8 @@ package plugin import ( "context" "fmt" - "time" - "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" ) @@ -15,15 +14,12 @@ type SyncOptions struct { SkipTables []string Concurrency int64 DeterministicCQID bool - // SyncTime if specified then this will be add to every table as _sync_time column - SyncTime time.Time - // If spceified then this will be added to every table as _source_name column - SourceName string - StateBackend state.Client + StateBackend state.Client } type ReadOnlyClient interface { - Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error + Tables(ctx context.Context) (schema.Tables, error) + Sync(ctx context.Context, options SyncOptions, res chan<- Message) error Close(ctx context.Context) error } @@ -48,15 +44,15 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } -func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { +func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) (Messages, error) { var err error - ch := make(chan arrow.Record) + ch := make(chan Message) go func() { defer close(ch) err = p.Sync(ctx, options, ch) }() // nolint:prealloc - var resources []arrow.Record + var resources []Message for resource := range ch { resources = append(resources, resource) } @@ -64,12 +60,12 @@ func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Reco } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - p.syncTime = options.SyncTime + // p.syncTime = options.SyncTime // startTime := time.Now() if err := p.client.Sync(ctx, options, res); err != nil { diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index d0420182be..009ac23ad7 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -2,76 +2,30 @@ package plugin import ( "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v4/schema" ) type WriteOptions struct { - // WriteMode is the mode to write to the database - WriteMode WriteMode - // Predefined tables are available if tables are known at the start of the write - Tables schema.Tables -} - -type MigrateOptions struct { - // MigrateMode is the mode to migrate the database - MigrateMode MigrateMode -} - -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, options MigrateOptions) error { - if p.client == nil { - return fmt.Errorf("plugin is not initialized") - } - return p.client.Migrate(ctx, tables, options) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource arrow.Record) error { - resources := []arrow.Record{resource} +func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { + resources := []Message{resource} return p.writeAll(ctx, options, resources) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []arrow.Record) error { - ch := make(chan arrow.Record, len(resources)) +func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []Message) error { + ch := make(chan Message, len(resources)) for _, resource := range resources { ch <- resource } close(ch) - tables := make(schema.Tables, 0) - tableNames := make(map[string]struct{}) - for _, resource := range resources { - sc := resource.Schema() - tableMD := sc.Metadata() - name, found := tableMD.GetValue(schema.MetadataTableName) - if !found { - return fmt.Errorf("missing table name") - } - if _, ok := tableNames[name]; ok { - continue - } - table, err := schema.NewTableFromArrowSchema(resource.Schema()) - if err != nil { - return err - } - tables = append(tables, table) - tableNames[table.Name] = struct{}{} - } - options.Tables = tables return p.Write(ctx, options, ch) } -func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error { +func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { if err := p.client.Write(ctx, options, res); err != nil { return err } return nil } - -func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - syncTime = syncTime.UTC() - return p.client.DeleteStale(ctx, tables, sourceName, syncTime) -} diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go deleted file mode 100644 index 6ac079ff13..0000000000 --- a/plugin/testing_overwrite_deletestale.go +++ /dev/null @@ -1,168 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/types" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { - writeMode := WriteModeOverwriteDeleteStale - if err := p.Init(ctx, spec); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - tableName := fmt.Sprintf("cq_overwrite_delete_stale_%d", time.Now().Unix()) - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - incTable := schema.TestTable(tableName+"_incremental", testOpts.TestSourceOptions) - incTable.IsIncremental = true - syncTime := time.Now().UTC().Round(1 * time.Second) - tables := schema.Tables{ - table, - incTable, - } - if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) - } - - sourceName := "testOverwriteSource" + uuid.NewString() - - opts := schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 2, - TimePrecision: testOpts.TimePrecision, - } - resources := schema.GenTestData(table, opts) - incResources := schema.GenTestData(incTable, opts) - allResources := resources - allResources = append(allResources, incResources...) - if err := p.writeAll(ctx, sourceName, syncTime, writeMode, allResources); err != nil { - return fmt.Errorf("failed to write all: %w", err) - } - sortRecordsBySyncTime(table, resources) - - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources, got %d", len(resourcesRead)) - } - testOpts.AllowNull.replaceNullsByEmpty(resources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resources) - } - if !recordApproxEqual(resources[0], resourcesRead[0]) { - diff := RecordDiff(resources[0], resourcesRead[0]) - return fmt.Errorf("expected first resource to be equal. diff: %s", diff) - } - - if !recordApproxEqual(resources[1], resourcesRead[1]) { - diff := RecordDiff(resources[1], resourcesRead[1]) - return fmt.Errorf("expected second resource to be equal. diff: %s", diff) - } - - // read from incremental table - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all: %w", err) - } - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources in incremental table, got %d", len(resourcesRead)) - } - - secondSyncTime := syncTime.Add(time.Second).UTC() - // copy first resource but update the sync time - cqIDInds := resources[0].Schema().FieldIndices(schema.CqIDColumn.Name) - u := resources[0].Column(cqIDInds[0]).(*types.UUIDArray).Value(0) - opts = schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: secondSyncTime, - StableUUID: u, - MaxRows: 1, - TimePrecision: testOpts.TimePrecision, - } - updatedResources := schema.GenTestData(table, opts) - updatedIncResources := schema.GenTestData(incTable, opts) - allUpdatedResources := updatedResources - allUpdatedResources = append(allUpdatedResources, updatedIncResources...) - - if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, allUpdatedResources); err != nil { - return fmt.Errorf("failed to write all second time: %w", err) - } - - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - if len(resourcesRead) != 1 { - return fmt.Errorf("after overwrite expected 1 resource, got %d", len(resourcesRead)) - } - testOpts.AllowNull.replaceNullsByEmpty(resources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resources) - } - if recordApproxEqual(resources[0], resourcesRead[0]) { - diff := RecordDiff(resources[0], resourcesRead[0]) - return fmt.Errorf("after overwrite expected first resource to be different. diff: %s", diff) - } - - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - if len(resourcesRead) != 1 { - return fmt.Errorf("expected 1 resource after delete stale, got %d", len(resourcesRead)) - } - - // we expect the only resource returned to match the updated resource we wrote - testOpts.AllowNull.replaceNullsByEmpty(updatedResources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(updatedResources) - } - if !recordApproxEqual(updatedResources[0], resourcesRead[0]) { - diff := RecordDiff(updatedResources[0], resourcesRead[0]) - return fmt.Errorf("after delete stale expected resource to be equal. diff: %s", diff) - } - - // we expect the incremental table to still have 3 resources, because delete-stale should - // not apply there - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{incTable.Name}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all from incremental table: %w", err) - } - if len(resourcesRead) != 3 { - return fmt.Errorf("expected 3 resources in incremental table after delete-stale, got %d", len(resourcesRead)) - } - - return nil -} diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 01a09c98b6..ecd136ca00 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -13,7 +13,7 @@ import ( type Validator func(t *testing.T, plugin *Plugin, resources []arrow.Record) -func TestPluginSync(t *testing.T, plugin *Plugin, sourceName string, spec any, options SyncOptions, opts ...TestPluginOption) { +func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, opts ...TestPluginOption) { t.Helper() o := &testPluginOptions{ @@ -101,19 +101,13 @@ func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) func validatePlugin(t *testing.T, plugin *Plugin, resources []arrow.Record) { t.Helper() - tables := extractTables(plugin.staticTables) - for _, table := range tables { - validateTable(t, table, resources) + tables, err := plugin.Tables(context.Background()) + if err != nil { + t.Fatal(err) } -} - -func extractTables(tables schema.Tables) []*schema.Table { - result := make([]*schema.Table, 0) - for _, table := range tables { - result = append(result, table) - result = append(result, extractTables(table.Relations)...) + for _, table := range tables.FlattenTables() { + validateTable(t, table, resources) } - return result } // Validates that every column has at least one non-nil value. diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go new file mode 100644 index 0000000000..f6b16f3ae3 --- /dev/null +++ b/plugin/testing_upsert.go @@ -0,0 +1,69 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (s *PluginTestSuite) testUpsert(ctx context.Context) error { + tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, + }, + } + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("foo") + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + + totalItems = messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + return nil +} diff --git a/plugin/testing_write.go b/plugin/testing_write.go index e7e50ef76f..5a358376af 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -2,21 +2,33 @@ package plugin import ( "context" - "os" "sort" "strings" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" - "github.com/rs/zerolog" ) type PluginTestSuite struct { tests PluginTestSuiteTests + + plugin *Plugin + + // AllowNull is a custom func to determine whether a data type may be correctly represented as null. + // Destinations that have problems representing some data types should provide a custom implementation here. + // If this param is empty, the default is to allow all data types to be nullable. + // When the value returned by this func is `true` the comparison is made with the empty value instead of null. + allowNull AllowNullFunc + + // IgnoreNullsInLists allows stripping null values from lists before comparison. + // Destination setups that don't support nulls in lists should set this to true. + ignoreNullsInLists bool + + // genDataOptions define how to generate test data and which data types to skip + genDatOptions schema.TestSourceOptions } // MigrateStrategy defines which tests we should include @@ -29,244 +41,97 @@ type MigrateStrategy struct { } type PluginTestSuiteTests struct { - // SkipOverwrite skips testing for "overwrite" mode. Use if the destination - // plugin doesn't support this feature. - SkipOverwrite bool - - // SkipDeleteStale skips testing "delete-stale" mode. Use if the destination - // plugin doesn't support this feature. - SkipDeleteStale bool + // SkipUpsert skips testing with MessageInsert and Upsert=true. + // Usually when a destination is not supporting primary keys + SkipUpsert bool - // SkipAppend skips testing for "append" mode. Use if the destination - // plugin doesn't support this feature. - SkipAppend bool + // SkipDelete skips testing MessageDelete events. + SkipDelete bool - // SkipSecondAppend skips the second append step in the test. - // This is useful in cases like cloud storage where you can't append to an - // existing object after the file has been closed. - SkipSecondAppend bool + // SkipAppend skips testing MessageInsert and Upsert=false. + SkipInsert bool - // SkipMigrateAppend skips a test for the migrate function where a column is added, - // data is appended, then the column is removed and more data appended, checking that the migrations handle - // this correctly. - SkipMigrateAppend bool - // SkipMigrateAppendForce skips a test for the migrate function where a column is changed in force mode - SkipMigrateAppendForce bool + // SkipMigrate skips testing migration + SkipMigrate bool - // SkipMigrateOverwrite skips a test for the migrate function where a column is added, - // data is appended, then the column is removed and more data overwritten, checking that the migrations handle - // this correctly. - SkipMigrateOverwrite bool - // SkipMigrateOverwriteForce skips a test for the migrate function where a column is changed in force mode - SkipMigrateOverwriteForce bool - - MigrateStrategyOverwrite MigrateStrategy - MigrateStrategyAppend MigrateStrategy -} - -func getTestLogger(t *testing.T) zerolog.Logger { - t.Helper() - zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs - return zerolog.New(zerolog.NewTestWriter(t)).Output( - zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.StampMicro}, - ).Level(zerolog.TraceLevel).With().Timestamp().Logger() + // MigrateStrategy defines which tests should work with force migration + // and which should pass with safe migration + MigrateStrategy MigrateStrategy } type NewPluginFunc func() *Plugin -type PluginTestSuiteRunnerOptions struct { - // IgnoreNullsInLists allows stripping null values from lists before comparison. - // Destination setups that don't support nulls in lists should set this to true. - IgnoreNullsInLists bool - - // AllowNull is a custom func to determine whether a data type may be correctly represented as null. - // Destinations that have problems representing some data types should provide a custom implementation here. - // If this param is empty, the default is to allow all data types to be nullable. - // When the value returned by this func is `true` the comparison is made with the empty value instead of null. - AllowNull AllowNullFunc - - schema.TestSourceOptions -} - -func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.AllowNull = allowNull - } -} - -func WithTestIgnoreNullsInLists() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.IgnoreNullsInLists = true - } -} - -func WithTestSourceTimePrecision(precision time.Duration) func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.TimePrecision = precision - } -} - -func WithTestSourceSkipLists() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipLists = true - } -} - -func WithTestSourceSkipTimestamps() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipTimestamps = true - } -} - -func WithTestSourceSkipDates() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipDates = true - } -} - -func WithTestSourceSkipMaps() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipMaps = true - } -} - -func WithTestSourceSkipStructs() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipStructs = true - } -} - -func WithTestSourceSkipIntervals() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipIntervals = true - } -} - -func WithTestSourceSkipDurations() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipDurations = true +func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *PluginTestSuite) { + return func(o *PluginTestSuite) { + o.allowNull = allowNull } } -func WithTestSourceSkipTimes() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipTimes = true +func WithTestIgnoreNullsInLists() func(o *PluginTestSuite) { + return func(o *PluginTestSuite) { + o.ignoreNullsInLists = true } } -func WithTestSourceSkipLargeTypes() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipLargeTypes = true +func WithTestDataOptions(opts schema.TestSourceOptions) func(o *PluginTestSuite) { + return func(o *PluginTestSuite) { + o.genDatOptions = opts } } -func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipDecimals = true - } -} - -func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { +func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *PluginTestSuite)) { t.Helper() suite := &PluginTestSuite{ - tests: tests, + tests: tests, + plugin: p, } - opts := PluginTestSuiteRunnerOptions{ - TestSourceOptions: schema.TestSourceOptions{ - TimePrecision: time.Microsecond, - }, - } - for _, o := range testOptions { - o(&opts) + for _, opt := range opts { + opt(suite) } ctx := context.Background() - logger := getTestLogger(t) - t.Run("TestWriteOverwrite", func(t *testing.T) { + t.Run("TestUpsert", func(t *testing.T) { t.Helper() - if suite.tests.SkipOverwrite { + if suite.tests.SkipUpsert { t.Skip("skipping " + t.Name()) } - p := newPlugin() - if err := suite.destinationPluginTestWriteOverwrite(ctx, p, logger, spec, opts); err != nil { - t.Fatal(err) - } - if err := p.Close(ctx); err != nil { + if err := suite.testUpsert(ctx); err != nil { t.Fatal(err) } }) - t.Run("TestWriteOverwriteDeleteStale", func(t *testing.T) { + t.Run("TestInsert", func(t *testing.T) { t.Helper() - if suite.tests.SkipOverwrite || suite.tests.SkipDeleteStale { + if suite.tests.SkipInsert { t.Skip("skipping " + t.Name()) } - p := newPlugin() - if err := suite.destinationPluginTestWriteOverwriteDeleteStale(ctx, p, logger, spec, opts); err != nil { - t.Fatal(err) - } - if err := p.Close(ctx); err != nil { + if err := suite.testInsert(ctx); err != nil { t.Fatal(err) } }) - t.Run("TestMigrateOverwrite", func(t *testing.T) { + t.Run("TestDelete", func(t *testing.T) { t.Helper() - if suite.tests.SkipMigrateOverwrite { + if suite.tests.SkipDelete { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeSafe - writeMode := WriteModeOverwrite - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) - }) - - t.Run("TestMigrateOverwriteForce", func(t *testing.T) { - t.Helper() - if suite.tests.SkipMigrateOverwriteForce { - t.Skip("skipping " + t.Name()) - } - migrateMode := MigrateModeForce - writeMode := WriteModeOverwrite - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) - }) - - t.Run("TestWriteAppend", func(t *testing.T) { - t.Helper() - if suite.tests.SkipAppend { - t.Skip("skipping " + t.Name()) - } - migrateMode := MigrateModeSafe - writeMode := WriteModeOverwrite - p := newPlugin() - if err := suite.destinationPluginTestWriteAppend(ctx, p, logger, migrateMode, writeMode, opts); err != nil { - t.Fatal(err) - } - if err := p.Close(ctx); err != nil { + if err := suite.testDelete(ctx); err != nil { t.Fatal(err) } }) - t.Run("TestMigrateAppend", func(t *testing.T) { + t.Run("TestMigrate", func(t *testing.T) { t.Helper() - if suite.tests.SkipMigrateAppend { + if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } migrateMode := MigrateModeSafe - writeMode := WriteModeAppend - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) + writeMode := WriteModeOverwrite + suite.destinationPluginTestMigrate(ctx, t, p, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) - t.Run("TestMigrateAppendForce", func(t *testing.T) { - t.Helper() - if suite.tests.SkipMigrateAppendForce { - t.Skip("skipping " + t.Name()) - } - migrateMode := MigrateModeForce - writeMode := WriteModeAppend - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) - }) } func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go deleted file mode 100644 index d4ccdd15d4..0000000000 --- a/plugin/testing_write_append.go +++ /dev/null @@ -1,95 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, migrateMode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, nil); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - tableName := fmt.Sprintf("cq_write_append_%d", time.Now().Unix()) - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - syncTime := time.Now().UTC().Round(1 * time.Second) - tables := schema.Tables{ - table, - } - if err := p.Migrate(ctx, tables, migrateMode); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) - } - - sourceName := "testAppendSource" + uuid.NewString() - - opts := schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 2, - TimePrecision: testOpts.TimePrecision, - } - record1 := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, sourceName, syncTime, writeMode, record1); err != nil { - return fmt.Errorf("failed to write record first time: %w", err) - } - - secondSyncTime := syncTime.Add(10 * time.Second).UTC() - opts.SyncTime = secondSyncTime - opts.MaxRows = 1 - record2 := schema.GenTestData(table, opts) - - if !s.tests.SkipSecondAppend { - // write second time - if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, record2); err != nil { - return fmt.Errorf("failed to write one second time: %w", err) - } - } - - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - - expectedResource := 3 - if s.tests.SkipSecondAppend { - expectedResource = 2 - } - - if len(resourcesRead) != expectedResource { - return fmt.Errorf("expected %d resources, got %d", expectedResource, len(resourcesRead)) - } - - testOpts.AllowNull.replaceNullsByEmpty(record1) - testOpts.AllowNull.replaceNullsByEmpty(record2) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(record1) - stripNullsFromLists(record2) - } - if !recordApproxEqual(record1[0], resourcesRead[0]) { - diff := RecordDiff(record1[0], resourcesRead[0]) - return fmt.Errorf("first expected resource diff at row 0: %s", diff) - } - if !recordApproxEqual(record1[1], resourcesRead[1]) { - diff := RecordDiff(record1[1], resourcesRead[1]) - return fmt.Errorf("first expected resource diff at row 1: %s", diff) - } - - if !s.tests.SkipSecondAppend { - if !recordApproxEqual(record2[0], resourcesRead[2]) { - diff := RecordDiff(record2[0], resourcesRead[2]) - return fmt.Errorf("second expected resource diff: %s", diff) - } - } - - return nil -} diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go new file mode 100644 index 0000000000..bb4c44c2d8 --- /dev/null +++ b/plugin/testing_write_delete.go @@ -0,0 +1,84 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" + // "github.com/cloudquery/plugin-sdk/v4/types" +) + +func (s *PluginTestSuite) testDelete(ctx context.Context) error { + tableName := fmt.Sprintf("cq_delete_%d", time.Now().Unix()) + syncTime := time.Now().UTC().Round(1 * time.Second) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String}, + {Name: "sync_time", Type: arrow.FixedWidthTypes.Timestamp_us}, + }, + } + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("test") + bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime) + bldr.Field(0).(*array.StringBuilder).Append("test") + bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + + if totalItems != 2 { + return fmt.Errorf("expected 2 items, got %d", totalItems) + } + + bldr = array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("test") + bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDelete{ + Record: bldr.NewRecord(), + WhereClauses: []WhereClause{ + { + Column: "name", + Operator: OperatorLessThan, + }, + }, + }); err != nil { + return fmt.Errorf("failed to delete stale records: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems = messages.InsertItems() + + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + return nil +} diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go new file mode 100644 index 0000000000..4bc7f66c86 --- /dev/null +++ b/plugin/testing_write_insert.go @@ -0,0 +1,68 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (s *PluginTestSuite) testInsert(ctx context.Context) error { + tableName := fmt.Sprintf("cq_test_insert_%d", time.Now().Unix()) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String}, + }, + } + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("foo") + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + + totalItems = messages.InsertItems() + if totalItems != 2 { + return fmt.Errorf("expected 2 item, got %d", totalItems) + } + + return nil +} diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 978c5951a2..78468a817e 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -8,25 +8,20 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" - "github.com/rs/zerolog" - "github.com/stretchr/testify/require" ) func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, target *schema.Table, source *schema.Table, mode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, nil); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - - if err := p.Migrate(ctx, schema.Tables{source}, mode); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) +func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: source, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) } sourceName := target.Name @@ -35,68 +30,69 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. SourceName: sourceName, SyncTime: syncTime, MaxRows: 1, - TimePrecision: testOpts.TimePrecision, + TimePrecision: s.genDatOptions.TimePrecision, } + resource1 := schema.GenTestData(source, opts)[0] - if err := p.writeOne(ctx, sourceName, syncTime, writeMode, resource1); err != nil { - return fmt.Errorf("failed to write one: %w", err) + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: resource1, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) } - if err := p.Migrate(ctx, schema.Tables{target}, mode); err != nil { - return fmt.Errorf("failed to migrate existing table: %w", err) + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{source.Name}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) } - opts.SyncTime = syncTime.Add(time.Second).UTC() - resource2 := schema.GenTestData(target, opts) - if err := p.writeAll(ctx, sourceName, syncTime, writeMode, resource2); err != nil { - return fmt.Errorf("failed to write one after migration: %w", err) + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) } - testOpts.AllowNull.replaceNullsByEmpty(resource2) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resource2) + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: target, + Force: strategy == MigrateModeForce, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) } - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{target.Name}, - SourceName: sourceName, + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: resource1, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{source.Name}, }) if err != nil { - return fmt.Errorf("failed to read all: %w", err) + return fmt.Errorf("failed to sync: %w", err) } - sortRecordsBySyncTime(target, resourcesRead) - if mode == MigrateModeSafe { - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources after write, got %d", len(resourcesRead)) - } - if !recordApproxEqual(resourcesRead[1], resource2[0]) { - diff := RecordDiff(resourcesRead[1], resource2[0]) - return fmt.Errorf("resource1 and resource2 are not equal. diff: %s", diff) + if strategy == MigrateModeSafe || mode == MigrateModeSafe { + totalItems = messages.InsertItems() + if totalItems != 2 { + return fmt.Errorf("expected 2 item, got %d", totalItems) } } else { - if len(resourcesRead) != 1 { - return fmt.Errorf("expected 1 resource after write, got %d", len(resourcesRead)) - } - if !recordApproxEqual(resourcesRead[0], resource2[0]) { - diff := RecordDiff(resourcesRead[0], resource2[0]) - return fmt.Errorf("resource1 and resource2 are not equal. diff: %s", diff) + totalItems = messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) } } return nil } -func (*PluginTestSuite) destinationPluginTestMigrate( +func (s *PluginTestSuite) testMigrate( ctx context.Context, t *testing.T, - newPlugin NewPluginFunc, - logger zerolog.Logger, - migrateMode MigrateMode, - writeMode WriteMode, - strategy MigrateStrategy, - testOpts PluginTestSuiteRunnerOptions, + mode MigrateMode, ) { t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.AddColumn == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -104,9 +100,6 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }, } @@ -114,25 +107,17 @@ func (*PluginTestSuite) destinationPluginTestMigrate( target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }, } - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumn, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumn, mode); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.AddColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -140,9 +125,6 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }, } @@ -150,23 +132,16 @@ func (*PluginTestSuite) destinationPluginTestMigrate( target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumnNotNull, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumnNotNull, mode); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.RemoveColumn == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -174,32 +149,21 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }} target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }} - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumn, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumn, mode); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.RemoveColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -207,9 +171,6 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }, @@ -217,23 +178,15 @@ func (*PluginTestSuite) destinationPluginTestMigrate( target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }} - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumnNotNull, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumnNotNull, mode); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.ChangeColumn == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -241,40 +194,24 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.BinaryTypes.String, NotNull: true}, }} - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.ChangeColumn, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.ChangeColumn, mode); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("double_migration", func(t *testing.T) { - tableName := "double_migration_" + tableUUIDSuffix() - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - - p := newPlugin() - require.NoError(t, p.Init(ctx, nil)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) - - require.NoError(t, p.Init(ctx, MigrateModeSafe)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) + // tableName := "double_migration_" + tableUUIDSuffix() + // table := schema.TestTable(tableName, testOpts.TestSourceOptions) + // require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateOptions{MigrateMode: MigrateModeForce})) + // require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateOptions{MigrateMode: MigrateModeForce})) }) } diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go deleted file mode 100644 index fd851a6e2e..0000000000 --- a/plugin/testing_write_overwrite.go +++ /dev/null @@ -1,115 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/types" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, spec); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - tableName := fmt.Sprintf("cq_test_write_overwrite_%d", time.Now().Unix()) - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - syncTime := time.Now().UTC().Round(1 * time.Second) - tables := schema.Tables{ - table, - } - if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) - } - - sourceName := "testOverwriteSource" + uuid.NewString() - - opts := schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 2, - TimePrecision: testOpts.TimePrecision, - } - resources := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, resources); err != nil { - return fmt.Errorf("failed to write all: %w", err) - } - sortRecordsBySyncTime(table, resources) - testOpts.AllowNull.replaceNullsByEmpty(resources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resources) - } - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources, got %d", len(resourcesRead)) - } - - if !recordApproxEqual(resources[0], resourcesRead[0]) { - diff := RecordDiff(resources[0], resourcesRead[0]) - return fmt.Errorf("expected first resource to be equal. diff=%s", diff) - } - - if !recordApproxEqual(resources[1], resourcesRead[1]) { - diff := RecordDiff(resources[1], resourcesRead[1]) - return fmt.Errorf("expected second resource to be equal. diff=%s", diff) - } - - secondSyncTime := syncTime.Add(time.Second).UTC() - - // copy first resource but update the sync time - cqIDInds := resources[0].Schema().FieldIndices(schema.CqIDColumn.Name) - u := resources[0].Column(cqIDInds[0]).(*types.UUIDArray).Value(0) - opts = schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: secondSyncTime, - MaxRows: 1, - StableUUID: u, - TimePrecision: testOpts.TimePrecision, - } - updatedResource := schema.GenTestData(table, opts) - // write second time - if err := p.writeAll(ctx, sourceName, secondSyncTime, WriteModeOverwrite, updatedResource); err != nil { - return fmt.Errorf("failed to write one second time: %w", err) - } - - testOpts.AllowNull.replaceNullsByEmpty(updatedResource) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(updatedResource) - } - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - if len(resourcesRead) != 2 { - return fmt.Errorf("after overwrite expected 2 resources, got %d", len(resourcesRead)) - } - - if !recordApproxEqual(resources[1], resourcesRead[0]) { - diff := RecordDiff(resources[1], resourcesRead[0]) - return fmt.Errorf("after overwrite expected first resource to be equal. diff=%s", diff) - } - if !recordApproxEqual(updatedResource[0], resourcesRead[1]) { - diff := RecordDiff(updatedResource[0], resourcesRead[1]) - return fmt.Errorf("after overwrite expected second resource to be equal. diff=%s", diff) - } - - return nil -} diff --git a/plugin/testing_write_upsert.go b/plugin/testing_write_upsert.go new file mode 100644 index 0000000000..4ee1ba9db7 --- /dev/null +++ b/plugin/testing_write_upsert.go @@ -0,0 +1,69 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (s *PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin) error { + tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, + }, + } + if err := p.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("foo") + + if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + + totalItems = messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + return nil +} diff --git a/scheduler/benchmark_test.go b/scheduler/benchmark_test.go new file mode 100644 index 0000000000..6990da0fd7 --- /dev/null +++ b/scheduler/benchmark_test.go @@ -0,0 +1 @@ +package scheduler diff --git a/scheduler/metrics.go b/scheduler/metrics.go index 372965ba93..f5b6c73ef6 100644 --- a/scheduler/metrics.go +++ b/scheduler/metrics.go @@ -7,6 +7,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +// Metrics is deprecated as we move toward open telemetry for tracing and metrics type Metrics struct { TableClient map[string]map[string]*TableClientMetrics } diff --git a/scheduler/plugin_managed_source_test.go.backup b/scheduler/plugin_managed_source_test.go.backup deleted file mode 100644 index e0a006a4ca..0000000000 --- a/scheduler/plugin_managed_source_test.go.backup +++ /dev/null @@ -1,484 +0,0 @@ -package scheduler - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/scalar" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/go-cmp/cmp" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -type testExecutionClient struct { - UnimplementedWriter -} - -var _ schema.ClientMeta = &testExecutionClient{} - -var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") -var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") - -var testSyncTime = time.Now() - -func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil -} - -func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { - panic("Resolver") -} - -func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { - panic("PreResourceResolver") -} - -func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { - panic("ColumnResolver") -} - -func testTableSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableSuccessWithPK() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - }, - } -} - -func testTableResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_resolver_panic", - Resolver: testResolverPanic, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTablePreResourceResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_pre_resource_resolver_panic", - PreResourceResolver: testPreResourceResolverPanic, - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableColumnResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_column_resolver_panic", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "test_column1", - Type: arrow.PrimitiveTypes.Int64, - Resolver: testColumnResolverPanic, - }, - }, - } -} - -func testTableRelationSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_relation_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - Relations: []*schema.Table{ - testTableSuccess(), - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func (*testExecutionClient) Close(context.Context) error { - return nil -} - -func (*testExecutionClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return fmt.Errorf("not implemented") -} - -func (*testExecutionClient) Sync(ctx context.Context, res chan<- arrow.Record) error { - return fmt.Errorf("not implemented") -} - -func newTestExecutionClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { - return &testExecutionClient{}, nil -} - -type syncTestCase struct { - table *schema.Table - stats Metrics - data []scalar.Vector - deterministicCQID bool -} - -var syncTestCases = []syncTestCase{ - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - { - table: testTablePreResourceResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_pre_resource_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableColumnResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_column_resolver_panic": { - "testExecutionClient": { - Panics: 1, - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - &scalar.Int{}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableSuccessWithPK(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: deterministicStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, -} - -type testRand struct{} - -func (testRand) Read(p []byte) (n int, err error) { - for i := range p { - p[i] = byte(0) - } - return len(p), nil -} - -func TestManagedSync(t *testing.T) { - uuid.SetRand(testRand{}) - for _, scheduler := range AllSchedulers { - for _, tc := range syncTestCases { - tc := tc - tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { - testSyncTable(t, tc, scheduler, tc.deterministicCQID) - }) - } - } -} - -func testSyncTable(t *testing.T, tc syncTestCase, scheduler Scheduler, deterministicCQID bool) { - ctx := context.Background() - tables := []*schema.Table{ - tc.table, - } - - plugin := NewPlugin( - "testSourcePlugin", - "1.0.0", - newTestExecutionClient, - WithStaticTables(tables), - ) - plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - sourceName := "testSource" - - if err := plugin.Init(ctx, nil); err != nil { - t.Fatal(err) - } - - records, err := plugin.syncAll(ctx, sourceName, testSyncTime, SyncOptions{ - Tables: []string{"*"}, - Concurrency: 1, - Scheduler: scheduler, - DeterministicCQID: deterministicCQID, - }) - if err != nil { - t.Fatal(err) - } - - var i int - for _, record := range records { - if tc.data == nil { - t.Fatalf("Unexpected resource %v", record) - } - if i >= len(tc.data) { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - rec := tc.data[i].ToArrowRecord(record.Schema()) - if !array.RecordEqual(rec, record) { - t.Fatal(RecordDiff(rec, record)) - // t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) - } - i++ - } - if len(tc.data) != i { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - - stats := plugin.Metrics() - if !tc.stats.Equal(stats) { - t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) - } -} - -// func TestIgnoredColumns(t *testing.T) { -// table := &schema.Table{ -// Columns: schema.ColumnList{ -// { -// Name: "a", -// Type: arrow.BinaryTypes.String, -// IgnoreInTests: true, -// }, -// }, -// } -// validateResources(t, table, schema.Resources{{ -// Item: struct{ A *string }{}, -// Table: &schema.Table{ -// Columns: schema.ColumnList{ -// { -// Name: "a", -// Type: arrow.BinaryTypes.String, -// IgnoreInTests: true, -// }, -// }, -// }, -// }}) -// } - -var testTable struct { - PrimaryKey string - SecondaryKey string - TertiaryKey string - Quaternary string -} - -// func TestNewPluginPrimaryKeys(t *testing.T) { -// testTransforms := []struct { -// transformerOptions []transformers.StructTransformerOption -// resultKeys []string -// }{ -// { -// transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, -// resultKeys: []string{"primary_key"}, -// }, -// { -// transformerOptions: []transformers.StructTransformerOption{}, -// resultKeys: []string{"_cq_id"}, -// }, -// } -// for _, tc := range testTransforms { -// tables := []*schema.Table{ -// { -// Name: "test_table", -// Transform: transformers.TransformWithStruct( -// &testTable, tc.transformerOptions..., -// ), -// }, -// } - -// plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) -// assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) -// } -// } diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index d9c2654634..08d8c86166 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -57,8 +57,6 @@ func (s SchedulerStrategy) String() string { return AllSchedulerNames[s] } -const periodicMetricLoggerInterval = 30 * time.Second - type Option func(*Scheduler) func WithLogger(logger zerolog.Logger) Option { @@ -79,6 +77,12 @@ func WithConcurrency(concurrency uint64) Option { } } +func WithSchedulerStrategy(strategy SchedulerStrategy) Option { + return func(s *Scheduler) { + s.strategy = strategy + } +} + type Scheduler struct { tables schema.Tables client schema.ClientMeta @@ -104,10 +108,14 @@ func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, caser: caser.New(), concurrency: defaultConcurrency, + maxDepth: maxDepth(tables), } for _, opt := range opts { opt(&s) } + if s.maxDepth > 3 { + panic(fmt.Errorf("max depth of %d is not supported for scheduler", s.maxDepth)) + } return &s } @@ -245,6 +253,20 @@ func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, ta } } +func maxDepth(tables schema.Tables) uint64 { + var depth uint64 + if len(tables) == 0 { + return 0 + } + for _, table := range tables { + newDepth := 1 + maxDepth(table.Relations) + if newDepth > depth { + depth = newDepth + } + } + return depth +} + // unparam's suggestion to remove the second parameter is not good advice here. // nolint:unparam func max(a, b uint64) uint64 { diff --git a/scheduler/scheduler_round_robin_test.go b/scheduler/scheduler_round_robin_test.go index 3b746b81bf..5e60765063 100644 --- a/scheduler/scheduler_round_robin_test.go +++ b/scheduler/scheduler_round_robin_test.go @@ -6,13 +6,6 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -type testExecutionClient struct { -} - -func (t *testExecutionClient) ID() string { - return "test" -} - func TestRoundRobinInterleave(t *testing.T) { table1 := &schema.Table{Name: "test_table"} table2 := &schema.Table{Name: "test_table2"} diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go new file mode 100644 index 0000000000..37bd9fea56 --- /dev/null +++ b/scheduler/scheduler_test.go @@ -0,0 +1,278 @@ +package scheduler + +import ( + "context" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/google/uuid" + "github.com/rs/zerolog" +) + +type testExecutionClient struct { +} + +func (t *testExecutionClient) ID() string { + return "test" +} + +var _ schema.ClientMeta = &testExecutionClient{} + +var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") +var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") + +var testSyncTime = time.Now() + +func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { + res <- map[string]any{ + "TestColumn": 3, + } + return nil +} + +func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { + panic("Resolver") +} + +func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { + panic("PreResourceResolver") +} + +func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { + panic("ColumnResolver") +} + +func testTableSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableSuccessWithPK() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + }, + } +} + +func testTableResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_resolver_panic", + Resolver: testResolverPanic, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTablePreResourceResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_pre_resource_resolver_panic", + PreResourceResolver: testPreResourceResolverPanic, + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableColumnResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_column_resolver_panic", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "test_column1", + Type: arrow.PrimitiveTypes.Int64, + Resolver: testColumnResolverPanic, + }, + }, + } +} + +func testTableRelationSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_relation_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + Relations: []*schema.Table{ + testTableSuccess(), + }, + } +} + +type syncTestCase struct { + table *schema.Table + data []scalar.Vector + deterministicCQID bool +} + +var syncTestCases = []syncTestCase{ + { + table: testTableSuccess(), + data: []scalar.Vector{ + { + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableResolverPanic(), + data: nil, + }, + { + table: testTablePreResourceResolverPanic(), + data: nil, + }, + + { + table: testTableRelationSuccess(), + data: []scalar.Vector{ + { + &scalar.Int64{Value: 3, Valid: true}, + }, + { + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableSuccess(), + data: []scalar.Vector{ + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + // &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableColumnResolverPanic(), + data: []scalar.Vector{ + { + &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int64{}, + }, + }, + // deterministicCQID: true, + }, + { + table: testTableRelationSuccess(), + data: []scalar.Vector{ + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + // &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + // deterministicCQID: true, + }, + { + table: testTableSuccessWithPK(), + data: []scalar.Vector{ + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: deterministicStableUUID, Valid: true}, + // &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + // deterministicCQID: true, + }, +} + +func TestScheduler(t *testing.T) { + // uuid.SetRand(testRand{}) + for _, scheduler := range AllSchedulers { + for _, tc := range syncTestCases { + tc := tc + tc.table = tc.table.Copy(nil) + t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { + testSyncTable(t, tc, scheduler, tc.deterministicCQID) + }) + } + } +} + +func testSyncTable(t *testing.T, tc syncTestCase, strategy SchedulerStrategy, deterministicCQID bool) { + ctx := context.Background() + tables := []*schema.Table{ + tc.table, + } + c := testExecutionClient{} + opts := []Option{ + WithLogger(zerolog.New(zerolog.NewTestWriter(t))), + WithSchedulerStrategy(strategy), + // WithDeterministicCQId(deterministicCQID), + } + sc := NewScheduler(tables, &c, opts...) + records := make(chan arrow.Record, 10) + if err := sc.Sync(ctx, records); err != nil { + t.Fatal(err) + } + close(records) + + var i int + for record := range records { + if tc.data == nil { + t.Fatalf("Unexpected resource %v", record) + } + if i >= len(tc.data) { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } + rec := tc.data[i].ToArrowRecord(record.Schema()) + if !array.RecordEqual(rec, record) { + t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) + } + i++ + } + if len(tc.data) != i { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } +} diff --git a/schema/resource.go b/schema/resource.go index e9d1f07da3..e55c31c262 100644 --- a/schema/resource.go +++ b/schema/resource.go @@ -97,6 +97,11 @@ func (r *Resource) CalculateCQID(deterministicCQID bool) error { } func (r *Resource) storeCQID(value uuid.UUID) error { + // We skeep if _cq_id is not present. + // Mostly the problem here is because the transformaiton step is baked into the the resolving step + if r.Table.Columns.Get(CqIDColumn.Name) == nil { + return nil + } b, err := value.MarshalBinary() if err != nil { return err diff --git a/serve/docs_test.go b/serve/docs_test.go index 9b65230168..296c9d438e 100644 --- a/serve/docs_test.go +++ b/serve/docs_test.go @@ -1 +1,20 @@ package serve + +import ( + "testing" + + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/plugin" +) + +func TestPluginDocs(t *testing.T) { + tmpDir := t.TempDir() + p := plugin.NewPlugin( + "testPlugin", + "v1.0.0", + memdb.NewMemDBClient) + srv := Plugin(p, WithArgs("doc", tmpDir), WithTestListener()) + if err := srv.newCmdPluginDoc().Execute(); err != nil { + t.Fatal(err) + } +} diff --git a/serve/plugin_test.go b/serve/plugin_test.go index e09308b704..d5357d1cb5 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -3,7 +3,6 @@ package serve import ( "bytes" "context" - "encoding/json" "io" "sync" "testing" @@ -11,67 +10,18 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/rs/zerolog" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) -type TestSourcePluginSpec struct { - Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` -} - -type testExecutionClient struct { - plugin.UnimplementedSync - plugin.UnimplementedWriter - plugin.UnimplementedRead -} - -var _ schema.ClientMeta = &testExecutionClient{} - -// var errTestExecutionClientErr = fmt.Errorf("error in newTestExecutionClientErr") - -func testTable(name string) *schema.Table { - return &schema.Table{ - Name: name, - Resolver: func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil - }, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func (*testExecutionClient) Close(ctx context.Context) error { - return nil -} - -func (c *testExecutionClient) NewManagedSyncClient(ctx context.Context, options plugin.SyncOptions) (plugin.ManagedSyncClient, error) { - return c, nil -} - -func newTestExecutionClient(context.Context, zerolog.Logger, any) (plugin.Client, error) { - return &testExecutionClient{}, nil -} - -func TestPlugin(t *testing.T) { +func TestPluginServe(t *testing.T) { p := plugin.NewPlugin( "testPlugin", "v1.0.0", - newTestExecutionClient, - plugin.WithStaticTables([]*schema.Table{testTable("test_table"), testTable("test_table2")})) + memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -110,7 +60,7 @@ func TestPlugin(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } - getTablesRes, err := c.GetStaticTables(ctx, &pb.GetStaticTables_Request{}) + getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) if err != nil { t.Fatal(err) } @@ -127,19 +77,6 @@ func TestPlugin(t *testing.T) { t.Fatal(err) } - getTablesForSpecRes, err := c.GetDynamicTables(ctx, &pb.GetDynamicTables_Request{}) - if err != nil { - t.Fatal(err) - } - tables, err = schema.NewTablesFromBytes(getTablesForSpecRes.Tables) - if err != nil { - t.Fatal(err) - } - - if len(tables) != 1 { - t.Fatalf("Expected 1 table but got %d", len(tables)) - } - syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) if err != nil { t.Fatal(err) @@ -183,28 +120,6 @@ func TestPlugin(t *testing.T) { t.Fatalf("Expected 1 resource on channel but got %d", totalResources) } - getMetricsRes, err := c.GetMetrics(ctx, &pb.GetMetrics_Request{}) - if err != nil { - t.Fatal(err) - } - var stats plugin.Metrics - if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { - t.Fatal(err) - } - - clientStats := stats.TableClient[""][""] - if clientStats.Resources != 1 { - t.Fatalf("Expected 1 resource but got %d", clientStats.Resources) - } - - if clientStats.Errors != 0 { - t.Fatalf("Expected 0 errors but got %d", clientStats.Errors) - } - - if clientStats.Panics != 0 { - t.Fatalf("Expected 0 panics but got %d", clientStats.Panics) - } - cancel() wg.Wait() if serverErr != nil { diff --git a/serve/state_v3_test.go b/serve/state_v3_test.go.backup similarity index 100% rename from serve/state_v3_test.go rename to serve/state_v3_test.go.backup diff --git a/transformers/tables.go b/transformers/tables.go index 99b563e2e5..9ffbc3dd1f 100644 --- a/transformers/tables.go +++ b/transformers/tables.go @@ -1,7 +1,6 @@ package transformers import ( - "context" "fmt" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -15,33 +14,6 @@ func setParents(tables schema.Tables, parent *schema.Table) { } } -// Add internal columns -func AddInternalColumns(tables []*schema.Table) error { - for _, table := range tables { - if c := table.Column("_cq_id"); c != nil { - return fmt.Errorf("table %s already has column _cq_id", table.Name) - } - cqID := schema.CqIDColumn - if len(table.PrimaryKeys()) == 0 { - cqID.PrimaryKey = true - } - cqSourceName := schema.CqSourceNameColumn - cqSyncTime := schema.CqSyncTimeColumn - cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.sourceName) - } - cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.syncTime) - } - - table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) - if err := AddInternalColumns(table.Relations); err != nil { - return err - } - } - return nil -} - // Apply transformations to tables func TransformTables(tables schema.Tables) error { for _, table := range tables { From 2287642eaa7ccd8f26939b1b9df4e95965d7f234 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 12 Jun 2023 23:05:35 +0300 Subject: [PATCH 016/125] more wip --- internal/memdb/memdb.go | 113 +++++---- internal/memdb/memdb_test.go | 210 +++++++--------- .../servers/destination/v0/destinations.go | 109 ++++---- .../servers/destination/v1/destinations.go | 87 ++++--- internal/servers/plugin/v3/plugin.go | 232 ++++++++---------- internal/servers/plugin/v3/state.go | 46 ++-- plugin/messages.go | 79 ++---- plugin/options.go | 16 -- plugin/plugin.go | 6 + plugin/plugin_reader.go | 17 +- plugin/plugin_test.go | 91 +++---- plugin/plugin_writer.go | 4 +- ...testing_sync.go => testing_sync.go.backup} | 0 plugin/testing_upsert.go | 11 +- plugin/testing_write.go | 54 ++-- plugin/testing_write_delete.go | 32 +-- plugin/testing_write_insert.go | 13 +- plugin/testing_write_migrate.go | 12 +- plugin/testing_write_upsert.go | 69 ------ scheduler/scheduler.go | 50 ++-- scheduler/scheduler_dfs.go | 2 +- scheduler/scheduler_round_robin.go | 2 +- scheduler/scheduler_test.go | 29 +-- schema/arrow.go | 37 ++- schema/table.go | 19 ++ serve/destination_v0_test.go | 18 +- serve/destination_v1_test.go | 21 +- serve/plugin_test.go | 65 ++++- transformers/tables.go | 4 +- writers/batch_test.go | 2 +- 30 files changed, 715 insertions(+), 735 deletions(-) rename plugin/{testing_sync.go => testing_sync.go.backup} (100%) delete mode 100644 plugin/testing_write_upsert.go diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index a23316939b..ca202a82be 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "sync" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -22,21 +21,21 @@ type client struct { blockingWrite bool } -type MemDBOption func(*client) +type Option func(*client) -func WithErrOnWrite() MemDBOption { +func WithErrOnWrite() Option { return func(c *client) { c.errOnWrite = true } } -func WithBlockingWrite() MemDBOption { +func WithBlockingWrite() Option { return func(c *client) { c.blockingWrite = true } } -func GetNewClient(options ...MemDBOption) plugin.NewClientFunc { +func GetNewClient(options ...Option) plugin.NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -56,7 +55,7 @@ func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (plugin.Clien }, nil } -func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (plugin.Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, any) (plugin.Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -85,11 +84,18 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- plugin.Message) error { c.memoryDBLock.RLock() + for tableName := range c.memoryDB { + if !plugin.IsTable(tableName, options.Tables, options.SkipTables) { + continue + } for _, row := range c.memoryDB[tableName] { - res <- row + res <- &plugin.MessageInsert{ + Record: row, + Upsert: false, + } } } c.memoryDBLock.RUnlock() @@ -104,28 +110,25 @@ func (c *client) Tables(ctx context.Context) (schema.Tables, error) { return tables, nil } -func (c *client) Migrate(_ context.Context, tables schema.Tables, options plugin.MigrateOptions) error { - for _, table := range tables { - tableName := table.Name - memTable := c.memoryDB[tableName] - if memTable == nil { - c.memoryDB[tableName] = make([]arrow.Record, 0) - c.tables[tableName] = table - continue - } - - changes := table.GetChanges(c.tables[tableName]) - // memdb doesn't support any auto-migrate - if changes == nil { - continue - } +func (c *client) migrate(_ context.Context, table *schema.Table) { + tableName := table.Name + memTable := c.memoryDB[tableName] + if memTable == nil { c.memoryDB[tableName] = make([]arrow.Record, 0) c.tables[tableName] = table + return } - return nil + + changes := table.GetChanges(c.tables[tableName]) + // memdb doesn't support any auto-migrate + if changes == nil { + return + } + c.memoryDB[tableName] = make([]arrow.Record, 0) + c.tables[tableName] = table } -func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resources <-chan arrow.Record) error { +func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <-chan plugin.Message) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -137,19 +140,28 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resourc return nil } - for resource := range resources { + for msg := range msgs { c.memoryDBLock.Lock() - sc := resource.Schema() - tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) - if !ok { - return fmt.Errorf("table name not found in schema metadata") - } - table := c.tables[tableName] - if options.WriteMode == plugin.WriteModeAppend { - c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) - } else { - c.overwrite(table, resource) + + switch msg := msg.(type) { + case *plugin.MessageCreateTable: + c.migrate(ctx, msg.Table) + case *plugin.MessageDeleteStale: + c.deleteStale(ctx, msg) + case *plugin.MessageInsert: + sc := msg.Record.Schema() + tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) + if !ok { + return fmt.Errorf("table name not found in schema metadata") + } + table := c.tables[tableName] + if msg.Upsert { + c.overwrite(table, msg.Record) + } else { + c.memoryDB[tableName] = append(c.memoryDB[tableName], msg.Record) + } } + c.memoryDBLock.Unlock() } return nil @@ -160,22 +172,25 @@ func (c *client) Close(context.Context) error { return nil } -func (c *client) DeleteStale(ctx context.Context, tables schema.Tables, source string, syncTime time.Time) error { - for _, table := range tables { - c.deleteStaleTable(ctx, table, source, syncTime) - } - return nil -} - -func (c *client) deleteStaleTable(_ context.Context, table *schema.Table, source string, syncTime time.Time) { - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - syncColIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) - tableName := table.Name +func (c *client) deleteStale(_ context.Context, msg *plugin.MessageDeleteStale) { var filteredTable []arrow.Record + tableName := msg.Table.Name for i, row := range c.memoryDB[tableName] { - if row.Column(sourceColIndex).(*array.String).Value(0) == source { + sc := row.Schema() + indices := sc.FieldIndices(schema.CqSourceNameColumn.Name) + if len(indices) == 0 { + continue + } + sourceColIndex := indices[0] + indices = sc.FieldIndices(schema.CqSyncTimeColumn.Name) + if len(indices) == 0 { + continue + } + syncColIndex := indices[0] + + if row.Column(sourceColIndex).(*array.String).Value(0) == msg.SourceName { rowSyncTime := row.Column(syncColIndex).(*array.Timestamp).Value(0).ToTime(arrow.Microsecond).UTC() - if !rowSyncTime.Before(syncTime) { + if !rowSyncTime.Before(msg.SyncTime) { filteredTable = append(filteredTable, c.memoryDB[tableName][i]) } } diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index e04a23bd1a..44a95c6b06 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -3,137 +3,107 @@ package memdb import ( "context" "testing" - "time" - "github.com/apache/arrow/go/v13/arrow" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/uuid" ) -var migrateStrategyOverwrite = plugin.MigrateStrategy{ - AddColumn: plugin.MigrateModeForce, - AddColumnNotNull: plugin.MigrateModeForce, - RemoveColumn: plugin.MigrateModeForce, - RemoveColumnNotNull: plugin.MigrateModeForce, - ChangeColumn: plugin.MigrateModeForce, -} - -var migrateStrategyAppend = plugin.MigrateStrategy{ - AddColumn: plugin.MigrateModeForce, - AddColumnNotNull: plugin.MigrateModeForce, - RemoveColumn: plugin.MigrateModeForce, - RemoveColumnNotNull: plugin.MigrateModeForce, - ChangeColumn: plugin.MigrateModeForce, -} - -func TestPluginUnmanagedClient(t *testing.T) { +func TestPlugin(t *testing.T) { + ctx := context.Background() + p := plugin.NewPlugin("test", "development", NewMemDBClient) + if err := p.Init(ctx, nil); err != nil { + t.Fatal(err) + } plugin.PluginTestSuiteRunner( t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewMemDBClient) - }, - nil, + p, plugin.PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }, - ) -} - -func TestPluginManagedClientWithCQPKs(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewMemDBClient) - }, - pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{ - PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, + MigrateStrategy: plugin.MigrateStrategy{ + AddColumn: plugin.MigrateModeForce, + AddColumnNotNull: plugin.MigrateModeForce, + RemoveColumn: plugin.MigrateModeForce, + RemoveColumnNotNull: plugin.MigrateModeForce, + ChangeColumn: plugin.MigrateModeForce, }, }, - plugin.PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) + ) } -func TestPluginOnNewError(t *testing.T) { - ctx := context.Background() - p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) - err := p.Init(ctx, nil) +// func TestPluginOnNewError(t *testing.T) { +// ctx := context.Background() +// p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) +// err := p.Init(ctx, nil) - if err == nil { - t.Fatal("expected error") - } -} +// if err == nil { +// t.Fatal("expected error") +// } +// } -func TestOnWriteError(t *testing.T) { - ctx := context.Background() - newClientFunc := GetNewClient(WithErrOnWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, nil); err != nil { - t.Fatal(err) - } - table := schema.TestTable("test", schema.TestSourceOptions{}) - tables := schema.Tables{ - table, - } - sourceName := "TestDestinationOnWriteError" - syncTime := time.Now() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } - ch := make(chan arrow.Record, 1) - opts := schema.GenTestDataOptions{ - SourceName: "test", - SyncTime: time.Now(), - MaxRows: 1, - StableUUID: uuid.Nil, - } - record := schema.GenTestData(table, opts)[0] - ch <- record - close(ch) - err := p.Write(ctx, sourceSpec, tables, syncTime, ch) - if err == nil { - t.Fatal("expected error") - } - if err.Error() != "errOnWrite" { - t.Fatalf("expected errOnWrite, got %s", err.Error()) - } -} +// func TestOnWriteError(t *testing.T) { +// ctx := context.Background() +// newClientFunc := GetNewClient(WithErrOnWrite()) +// p := plugin.NewPlugin("test", "development", newClientFunc) +// if err := p.Init(ctx, nil); err != nil { +// t.Fatal(err) +// } +// table := schema.TestTable("test", schema.TestSourceOptions{}) +// tables := schema.Tables{ +// table, +// } +// sourceName := "TestDestinationOnWriteError" +// syncTime := time.Now() +// sourceSpec := pbPlugin.Spec{ +// Name: sourceName, +// } +// ch := make(chan arrow.Record, 1) +// opts := schema.GenTestDataOptions{ +// SourceName: "test", +// SyncTime: time.Now(), +// MaxRows: 1, +// StableUUID: uuid.Nil, +// } +// record := schema.GenTestData(table, opts)[0] +// ch <- record +// close(ch) +// err := p.Write(ctx, sourceSpec, tables, syncTime, ch) +// if err == nil { +// t.Fatal("expected error") +// } +// if err.Error() != "errOnWrite" { +// t.Fatalf("expected errOnWrite, got %s", err.Error()) +// } +// } -func TestOnWriteCtxCancelled(t *testing.T) { - ctx := context.Background() - newClientFunc := GetNewClient(WithBlockingWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{}, - }); err != nil { - t.Fatal(err) - } - table := schema.TestTable("test", schema.TestSourceOptions{}) - tables := schema.Tables{ - table, - } - sourceName := "TestDestinationOnWriteError" - syncTime := time.Now() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } - ch := make(chan arrow.Record, 1) - ctx, cancel := context.WithTimeout(ctx, 2*time.Second) - opts := schema.GenTestDataOptions{ - SourceName: "test", - SyncTime: time.Now(), - MaxRows: 1, - StableUUID: uuid.Nil, - } - record := schema.GenTestData(table, opts)[0] - ch <- record - defer cancel() - err := p.Write(ctx, sourceSpec, tables, syncTime, ch) - if err != nil { - t.Fatal(err) - } -} +// func TestOnWriteCtxCancelled(t *testing.T) { +// ctx := context.Background() +// newClientFunc := GetNewClient(WithBlockingWrite()) +// p := plugin.NewPlugin("test", "development", newClientFunc) +// if err := p.Init(ctx, pbPlugin.Spec{ +// WriteSpec: &pbPlugin.WriteSpec{}, +// }); err != nil { +// t.Fatal(err) +// } +// table := schema.TestTable("test", schema.TestSourceOptions{}) +// tables := schema.Tables{ +// table, +// } +// sourceName := "TestDestinationOnWriteError" +// syncTime := time.Now() +// sourceSpec := pbPlugin.Spec{ +// Name: sourceName, +// } +// ch := make(chan arrow.Record, 1) +// ctx, cancel := context.WithTimeout(ctx, 2*time.Second) +// opts := schema.GenTestDataOptions{ +// SourceName: "test", +// SyncTime: time.Now(), +// MaxRows: 1, +// StableUUID: uuid.Nil, +// } +// record := schema.GenTestData(table, opts)[0] +// ch <- record +// defer cancel() +// err := p.Write(ctx, sourceSpec, tables, syncTime, ch) +// if err != nil { +// t.Fatal(err) +// } +// } diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 4c22750e69..ad5506e161 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -4,8 +4,9 @@ import ( "context" "encoding/json" "io" + "sync" - "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" @@ -21,11 +22,9 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *plugin.Plugin - Logger zerolog.Logger - spec specs.Destination - writeMode plugin.WriteMode - migrateMode plugin.MigrateMode + Plugin *plugin.Plugin + Logger zerolog.Logger + spec specs.Destination } func (*Server) GetProtocolVersion(context.Context, *pbBase.GetProtocolVersion_Request) (*pbBase.GetProtocolVersion_Response, error) { @@ -40,20 +39,6 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - switch s.spec.WriteMode { - case specs.WriteModeAppend: - s.writeMode = plugin.WriteModeAppend - case specs.WriteModeOverwrite: - s.writeMode = plugin.WriteModeOverwrite - case specs.WriteModeOverwriteDeleteStale: - s.writeMode = plugin.WriteModeOverwriteDeleteStale - } - switch s.spec.MigrateMode { - case specs.MigrateModeSafe: - s.migrateMode = plugin.MigrateModeSafe - case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForce - } return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) } @@ -77,17 +62,22 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) - - var migrateMode plugin.MigrateMode - switch s.spec.MigrateMode { - case specs.MigrateModeSafe: - migrateMode = plugin.MigrateModeSafe - case specs.MigrateModeForced: - migrateMode = plugin.MigrateModeForce - default: - return nil, status.Errorf(codes.InvalidArgument, "invalid migrate mode: %v", s.spec.MigrateMode) + writeCh := make(chan plugin.Message) + eg, ctx := errgroup.WithContext(ctx) + eg.Go(func() error { + return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + }) + for _, table := range tables { + writeCh <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + } + } + close(writeCh) + if err := eg.Wait(); err != nil { + return nil, status.Errorf(codes.Internal, "failed to write: %v", err) } - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) + return &pb.Migrate_Response{}, nil } func (*Server) Write(pb.Destination_WriteServer) error { @@ -97,7 +87,7 @@ func (*Server) Write(pb.Destination_WriteServer) error { // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write2(msg pb.Destination_Write2Server) error { - resources := make(chan arrow.Record) + msgs := make(chan plugin.Message) r, err := msg.Recv() if err != nil { @@ -126,10 +116,18 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceName := r.Source + // sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) + return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) }) + + for _, table := range tables { + msgs <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + } + } + sourceColumn := &schemav2.Text{} _ = sourceColumn.Set(sourceSpec.Name) syncTimeColumn := &schemav2.Timestamptz{} @@ -138,30 +136,32 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { for { r, err := msg.Recv() if err == io.EOF { - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "write failed: %v", err) } return msg.SendAndClose(&pb.Write2_Response{}) } if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) } return status.Errorf(codes.Internal, "failed to receive msg: %v", err) } + var origResource schemav2.DestinationResource if err := json.Unmarshal(r.Resource, &origResource); err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.InvalidArgument, "failed to unmarshal resource: %v and write failed: %v", err, wgErr) } return status.Errorf(codes.InvalidArgument, "failed to unmarshal resource: %v", err) } + table := tables.Get(origResource.TableName) if table == nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.InvalidArgument, "failed to get table: %s and write failed: %v", origResource.TableName, wgErr) } @@ -173,11 +173,15 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { origResource.Data = append([]schemav2.CQType{sourceColumn, syncTimeColumn}, origResource.Data...) } convertedResource := CQTypesToRecord(memory.DefaultAllocator, []schemav2.CQTypes{origResource.Data}, table.ToArrowSchema()) + msg := &plugin.MessageInsert{ + Record: convertedResource, + Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, + } + select { - case resources <- convertedResource: + case msgs <- msg: case <-ctx.Done(): - convertedResource.Release() - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) } @@ -228,11 +232,28 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( } tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) - if err := s.Plugin.DeleteStale(ctx, tables, req.Source, req.Timestamp.AsTime()); err != nil { - return nil, err - } - return &pb.DeleteStale_Response{}, nil + msgs := make(chan plugin.Message) + var writeErr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + writeErr = s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + }() + for _, table := range tables { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) + bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) + msgs <- &plugin.MessageDeleteStale{ + Table: table, + SourceName: req.Source, + SyncTime: req.Timestamp.AsTime(), + } + } + close(msgs) + wg.Wait() + return &pb.DeleteStale_Response{}, writeErr } func (s *Server) setPKsForTables(tables schema.Tables) { diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 45cfa4f7f7..0bfdb886ca 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -5,9 +5,11 @@ import ( "context" "encoding/json" "io" + "sync" - "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" @@ -23,7 +25,6 @@ type Server struct { Plugin *plugin.Plugin Logger zerolog.Logger spec specs.Destination - writeMode plugin.WriteMode migrateMode plugin.MigrateMode } @@ -33,20 +34,6 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - switch s.spec.WriteMode { - case specs.WriteModeAppend: - s.writeMode = plugin.WriteModeAppend - case specs.WriteModeOverwrite: - s.writeMode = plugin.WriteModeOverwrite - case specs.WriteModeOverwriteDeleteStale: - s.writeMode = plugin.WriteModeOverwriteDeleteStale - } - switch s.spec.MigrateMode { - case specs.MigrateModeSafe: - s.migrateMode = plugin.MigrateModeSafe - case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForce - } return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) } @@ -73,13 +60,28 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr } s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, s.migrateMode) + writeCh := make(chan plugin.Message) + eg, ctx := errgroup.WithContext(ctx) + eg.Go(func() error { + return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + }) + for _, table := range tables { + writeCh <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.migrateMode == plugin.MigrateModeForce, + } + } + close(writeCh) + if err := eg.Wait(); err != nil { + return nil, status.Errorf(codes.Internal, "failed to write: %v", err) + } + return &pb.Migrate_Response{}, nil } // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write(msg pb.Destination_WriteServer) error { - resources := make(chan arrow.Record) + msgs := make(chan plugin.Message) r, err := msg.Recv() if err != nil { @@ -108,26 +110,31 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { return status.Errorf(codes.InvalidArgument, "failed to unmarshal source spec: %v", err) } } - syncTime := r.Timestamp.AsTime() s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) + return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) }) + for _, table := range tables { + msgs <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + } + } + for { r, err := msg.Recv() if err == io.EOF { - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "write failed: %v", err) } return msg.SendAndClose(&pb.Write_Response{}) } if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) } @@ -135,7 +142,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { } rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.InvalidArgument, "failed to create reader: %v and write failed: %v", err, wgErr) } @@ -144,10 +151,14 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() + msg := &plugin.MessageInsert{ + Record: rec, + Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, + } select { - case resources <- rec: + case msgs <- msg: case <-ctx.Done(): - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) } @@ -190,11 +201,27 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - if err := s.Plugin.DeleteStale(ctx, tables, req.Source, req.Timestamp.AsTime()); err != nil { - return nil, err + msgs := make(chan plugin.Message) + var writeErr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + writeErr = s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + }() + for _, table := range tables { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) + bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) + msgs <- &plugin.MessageDeleteStale{ + Table: table, + SourceName: req.Source, + SyncTime: req.Timestamp.AsTime(), + } } - - return &pb.DeleteStale_Response{}, nil + close(msgs) + wg.Wait() + return &pb.DeleteStale_Response{}, writeErr } func (s *Server) setPKsForTables(tables schema.Tables) { diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 92c7c27cc1..64e166e9ec 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -1,14 +1,12 @@ package plugin import ( - "bytes" "context" "errors" "fmt" "io" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" @@ -19,6 +17,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" ) const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB @@ -31,9 +30,12 @@ type Server struct { NoSentry bool } -func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTables_Response, error) { - tables := s.Plugin.Tables().ToArrowSchemas() - encoded, err := tables.Encode() +func (s *Server) GetTables(ctx context.Context, _ *pb.GetTables_Request) (*pb.GetTables_Response, error) { + tables, err := s.Plugin.Tables(ctx) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to get tables: %v", err) + } + encoded, err := tables.ToArrowSchemas().Encode() if err != nil { return nil, fmt.Errorf("failed to encode tables: %w", err) } @@ -62,7 +64,7 @@ func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Respo } func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { - records := make(chan arrow.Record) + msgs := make(chan plugin.Message) var syncErr error ctx := stream.Context() @@ -72,8 +74,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { Concurrency: req.Concurrency, } - // sourceName := req.SourceName - if req.StateBackend != nil { opts := []managedplugin.Option{ managedplugin.WithLogger(s.Logger), @@ -90,51 +90,69 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { if err != nil { return status.Errorf(codes.Internal, "failed to create state plugin: %v", err) } - stateClient, err := newStateClient(ctx, statePlugin.Conn, *req.StateBackend) + stateClient, err := newStateClient(ctx, statePlugin.Conn, req.StateBackend) if err != nil { return status.Errorf(codes.Internal, "failed to create state client: %v", err) } syncOptions.StateBackend = stateClient } - if req.SyncTime != nil { - syncOptions.SyncTime = req.SyncTime.AsTime() - } - - if req.SourceName != "" { - syncOptions.SourceName = req.SourceName - } go func() { - defer close(records) - err := s.Plugin.Sync(ctx, syncOptions, records) + defer close(msgs) + err := s.Plugin.Sync(ctx, syncOptions, msgs) if err != nil { syncErr = fmt.Errorf("failed to sync records: %w", err) } }() - for rec := range records { - var buf bytes.Buffer - w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) - if err := w.Write(rec); err != nil { - return status.Errorf(codes.Internal, "failed to write record: %v", err) - } - if err := w.Close(); err != nil { - return status.Errorf(codes.Internal, "failed to close writer: %v", err) + pbMsg := &pb.Sync_Response{} + for msg := range msgs { + switch m := msg.(type) { + case *plugin.MessageCreateTable: + m.Table.ToArrowSchema() + pbMsg.Message = &pb.Sync_Response_CreateTable{ + CreateTable: &pb.MessageCreateTable{ + Table: nil, + MigrateForce: m.MigrateForce, + }, + } + case *plugin.MessageInsert: + recordBytes, err := schema.RecordToBytes(m.Record) + if err != nil { + return status.Errorf(codes.Internal, "failed to encode record: %v", err) + } + pbMsg.Message = &pb.Sync_Response_Insert{ + Insert: &pb.MessageInsert{ + Record: recordBytes, + Upsert: m.Upsert, + }, + } + case *plugin.MessageDeleteStale: + tableBytes, err := m.Table.ToArrowSchemaBytes() + if err != nil { + return status.Errorf(codes.Internal, "failed to encode record: %v", err) + } + pbMsg.Message = &pb.Sync_Response_Delete{ + Delete: &pb.MessageDeleteStale{ + Table: tableBytes, + SourceName: m.SourceName, + SyncTime: timestamppb.New(m.SyncTime), + }, + } + default: + return status.Errorf(codes.Internal, "unknown message type: %T", msg) } - msg := &pb.Sync_Response{ - Resource: buf.Bytes(), - } - err := checkMessageSize(msg, rec) - if err != nil { - sc := rec.Schema() - tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) - s.Logger.Warn().Str("table", tName). - Int("bytes", len(msg.String())). - Msg("Row exceeding max bytes ignored") - continue - } - if err := stream.Send(msg); err != nil { + // err := checkMessageSize(msg, rec) + // if err != nil { + // sc := rec.Schema() + // tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) + // s.Logger.Warn().Str("table", tName). + // Int("bytes", len(msg.String())). + // Msg("Row exceeding max bytes ignored") + // continue + // } + if err := stream.Send(pbMsg); err != nil { return status.Errorf(codes.Internal, "failed to send resource: %v", err) } } @@ -142,105 +160,82 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { return syncErr } -func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migrate_Response, error) { - schemas, err := schema.NewSchemasFromBytes(req.Tables) - if err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) - } - tables, err := schema.NewTablesFromArrowSchemas(schemas) - if err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) - } - if req.PkMode == pb.PK_MODE_CQ_ID_ONLY { - setCQIDAsPrimaryKeysForTables(tables) - } - migrateMode := plugin.MigrateModeSafe - switch req.MigrateMode { - case pb.MIGRATE_MODE_SAFE: - migrateMode = plugin.MigrateModeSafe - case pb.MIGRATE_MODE_FORCE: - migrateMode = plugin.MigrateModeForce - } - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) -} - func (s *Server) Write(msg pb.Plugin_WriteServer) error { - resources := make(chan arrow.Record) + msgs := make(chan plugin.Message) - r, err := msg.Recv() - if err != nil { - if err == io.EOF { - return msg.SendAndClose(&pb.Write_Response{}) - } - return status.Errorf(codes.Internal, "failed to receive msg: %v", err) - } - - schemas, err := schema.NewSchemasFromBytes(r.Tables) - if err != nil { - return status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) - } - tables, err := schema.NewTablesFromArrowSchemas(schemas) - if err != nil { - return status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) - } - if r.PkMode == pb.PK_MODE_CQ_ID_ONLY { - setCQIDAsPrimaryKeysForTables(tables) - } - sourceName := r.SourceName - syncTime := r.SyncTime.AsTime() - writeMode := plugin.WriteModeOverwrite - switch r.WriteMode { - case pb.WRITE_MODE_WRITE_MODE_APPEND: - writeMode = plugin.WriteModeAppend - case pb.WRITE_MODE_WRITE_MODE_OVERWRITE: - writeMode = plugin.WriteModeOverwrite - case pb.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE: - writeMode = plugin.WriteModeOverwriteDeleteStale - } eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceName, tables, syncTime, writeMode, resources) + return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) }) for { r, err := msg.Recv() if err == io.EOF { - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "write failed: %v", err) } return msg.SendAndClose(&pb.Write_Response{}) } if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) } return status.Errorf(codes.Internal, "failed to receive msg: %v", err) } - rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) - if err != nil { - close(resources) - if wgErr := eg.Wait(); wgErr != nil { - return status.Errorf(codes.InvalidArgument, "failed to create reader: %v and write failed: %v", err, wgErr) + var pluginMessage plugin.Message + var pbMsgConvertErr error + switch pbMsg := r.Message.(type) { + case *pb.Write_Request_CreateTable: + table, err := schema.NewTableFromBytes(pbMsg.CreateTable.Table) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) + break + } + pluginMessage = &plugin.MessageCreateTable{ + Table: table, + MigrateForce: pbMsg.CreateTable.MigrateForce, + } + case *pb.Write_Request_Insert: + record, err := schema.NewRecordFromBytes(pbMsg.Insert.Record) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) + break + } + pluginMessage = &plugin.MessageInsert{ + Record: record, + Upsert: pbMsg.Insert.Upsert, + } + case *pb.Write_Request_Delete: + table, err := schema.NewTableFromBytes(pbMsg.Delete.Table) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) + break + } + pluginMessage = &plugin.MessageDeleteStale{ + Table: table, + SourceName: pbMsg.Delete.SourceName, + SyncTime: pbMsg.Delete.SyncTime.AsTime(), } - return status.Errorf(codes.InvalidArgument, "failed to create reader: %v", err) } - for rdr.Next() { - rec := rdr.Record() - rec.Retain() - select { - case resources <- rec: - case <-ctx.Done(): - close(resources) - if err := eg.Wait(); err != nil { - return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) - } - return status.Errorf(codes.Internal, "Context done: %v", ctx.Err()) + + if pbMsgConvertErr != nil { + close(msgs) + if wgErr := eg.Wait(); wgErr != nil { + return status.Errorf(codes.Internal, "failed to convert message: %v and write failed: %v", pbMsgConvertErr, wgErr) } + return pbMsgConvertErr } - if err := rdr.Err(); err != nil { - return status.Errorf(codes.InvalidArgument, "failed to read resource: %v", err) + + select { + case msgs <- pluginMessage: + case <-ctx.Done(): + close(msgs) + if err := eg.Wait(); err != nil { + return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) + } + return status.Errorf(codes.Internal, "Context done: %v", ctx.Err()) } } } @@ -263,15 +258,6 @@ func checkMessageSize(msg proto.Message, record arrow.Record) error { return nil } -func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { - for _, table := range tables { - for i, col := range table.Columns { - table.Columns[i].PrimaryKey = col.Name == schema.CqIDColumn.Name - } - setCQIDAsPrimaryKeysForTables(table.Relations) - } -} - func (s *Server) Close(ctx context.Context, _ *pb.Close_Request) (*pb.Close_Response, error) { return &pb.Close_Response{}, s.Plugin.Close(ctx) } diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index be152297b7..81fd753a5c 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -23,14 +23,13 @@ const keyColumn = "key" const valueColumn = "value" type ClientV3 struct { - client pbPlugin.PluginClient - encodedTables [][]byte - mem map[string]string - keys []string - values []string + client pbPlugin.PluginClient + mem map[string]string + keys []string + values []string } -func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.StateBackendSpec) (state.Client, error) { +func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec *pbPlugin.StateBackendSpec) (state.Client, error) { discoveryClient := pbDiscovery.NewDiscoveryClient(conn) versions, err := discoveryClient.GetVersions(ctx, &pbDiscovery.GetVersions_Request{}) if err != nil { @@ -61,8 +60,7 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St }, }, } - tables := schema.Tables{table} - c.encodedTables, err = tables.ToArrowSchemas().Encode() + tableBytes, err := table.ToArrowSchemaBytes() if err != nil { return nil, err } @@ -73,9 +71,17 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St return nil, err } - if _, err := c.client.Migrate(ctx, &pbPlugin.Migrate_Request{ - Tables: c.encodedTables, - MigrateMode: pbPlugin.MIGRATE_MODE_SAFE, + writeClient, err := c.client.Write(ctx) + if err != nil { + return nil, err + } + + if err := writeClient.Send(&pbPlugin.Write_Request{ + Message: &pbPlugin.Write_Request_CreateTable{ + CreateTable: &pbPlugin.MessageCreateTable{ + Table: tableBytes, + }, + }, }); err != nil { return nil, err } @@ -94,7 +100,11 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St } return nil, err } - rdr, err := ipc.NewReader(bytes.NewReader(res.Resource)) + insertMessage := res.GetInsert() + if insertMessage == nil { + return nil, fmt.Errorf("unexpected message type %T", res) + } + rdr, err := ipc.NewReader(bytes.NewReader(insertMessage.Record)) if err != nil { return nil, err } @@ -141,12 +151,12 @@ func (c *ClientV3) flush(ctx context.Context) error { return err } if err := writeClient.Send(&pbPlugin.Write_Request{ - WriteMode: pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE, - }); err != nil { - return err - } - if err := writeClient.Send(&pbPlugin.Write_Request{ - Resource: buf.Bytes(), + Message: &pbPlugin.Write_Request_Insert{ + Insert: &pbPlugin.MessageInsert{ + Record: buf.Bytes(), + Upsert: true, + }, + }, }); err != nil { return err } diff --git a/plugin/messages.go b/plugin/messages.go index fa975cc97c..43e3eedacb 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -1,83 +1,42 @@ package plugin import ( + "time" + "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" ) -type MessageType int - -const ( - // Create table - MessageTypeCreate MessageType = iota - // Insert record - MessageTypeInsert - // Insert or update record - MessageTypeUpsert - // Delete rows - MessageTypeDelete -) - type MessageCreateTable struct { - Table *schema.Table - Force bool -} - -func (*MessageCreateTable) Type() MessageType { - return MessageTypeCreate + Table *schema.Table + MigrateForce bool } type MessageInsert struct { - Record arrow.Record - Columns []string - Upsert bool -} - -func (*MessageInsert) Type() MessageType { - return MessageTypeInsert -} - -type Operator int - -const ( - OperatorEqual Operator = iota - OperatorNotEqual - OperatorGreaterThan - OperatorGreaterThanOrEqual - OperatorLessThan - OperatorLessThanOrEqual -) - -type WhereClause struct { - Column string - Operator Operator - Value string -} - -type MessageDelete struct { Record arrow.Record - // currently delete only supports and where clause as we don't support - // full AST parsing - WhereClauses []WhereClause + Upsert bool } -func (*MessageDelete) Type() MessageType { - return MessageTypeDelete +// MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case +// thus it might be deprecated in the future +// in favour of MessageDelete or MessageRawQuery +// The message indeciates that the destination needs to run something like "DELETE FROM table WHERE _cq_source_name=$1 and sync_time < $2" +type MessageDeleteStale struct { + Table *schema.Table + SourceName string + SyncTime time.Time } -type Message interface { - Type() MessageType -} +type Message any type Messages []Message -func (m Messages) InsertItems() int64 { +func (messages Messages) InsertItems() int64 { items := int64(0) - for _, msg := range m { - switch msg.Type() { - case MessageTypeInsert: - msgInsert := msg.(*MessageInsert) - items += msgInsert.Record.NumRows() + for _, msg := range messages { + switch m := msg.(type) { + case *MessageInsert: + items += m.Record.NumRows() } } return items diff --git a/plugin/options.go b/plugin/options.go index 09a771d0b6..966f692e60 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -15,20 +15,4 @@ func (m MigrateMode) String() string { return migrateModeStrings[m] } -type WriteMode int - -const ( - WriteModeOverwriteDeleteStale WriteMode = iota - WriteModeOverwrite - WriteModeAppend -) - -var ( - writeModeStrings = []string{"overwrite-delete-stale", "overwrite", "append"} -) - -func (m WriteMode) String() string { - return writeModeStrings[m] -} - type Option func(*Plugin) diff --git a/plugin/plugin.go b/plugin/plugin.go index 9900d16e26..b583e86811 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -104,6 +104,9 @@ func (p *Plugin) SetLogger(logger zerolog.Logger) { } func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { + if p.client == nil { + return nil, fmt.Errorf("plugin not initialized") + } tables, err := p.client.Tables(ctx) if err != nil { return nil, fmt.Errorf("failed to get tables: %w", err) @@ -132,5 +135,8 @@ func (p *Plugin) Close(ctx context.Context) error { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() + if p.client == nil { + return nil + } return p.client.Close(ctx) } diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 89963d7eb4..4f9d51e66a 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + "github.com/cloudquery/plugin-sdk/v4/internal/glob" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" @@ -23,6 +24,20 @@ type ReadOnlyClient interface { Close(ctx context.Context) error } +func IsTable(name string, includeTablesPattern []string, skipTablesPattern []string) bool { + for _, pattern := range skipTablesPattern { + if glob.Glob(pattern, name) { + return false + } + } + for _, pattern := range includeTablesPattern { + if glob.Glob(pattern, name) { + return true + } + } + return false +} + type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (ReadOnlyClient, error) // NewReadOnlyPlugin returns a new CloudQuery Plugin with the given name, version and implementation. @@ -44,7 +59,7 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } -func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) (Messages, error) { +func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, error) { var err error ch := make(chan Message) go func() { diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index d0016a113b..e33670b6d3 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -3,71 +3,72 @@ package plugin import ( "context" "testing" - "time" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/rs/zerolog" ) -func TestPluginUnmanagedSync(t *testing.T) { - ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewMemDBClient) - testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) - syncTime := time.Now().UTC() - sourceName := "test" - testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 1, - }) - if err := p.Init(ctx, nil); err != nil { - t.Fatal(err) - } +type testPluginSpec struct { +} - if err := p.Migrate(ctx, schema.Tables{testTable}, MigrateModeSafe); err != nil { - t.Fatal(err) +type testPluginClient struct { + messages []Message +} + +func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { + return &testPluginClient{}, nil +} + +func (c *testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { + return schema.Tables{}, nil +} +func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { + for _, msg := range c.messages { + res <- msg + } + return nil +} +func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { + for msg := range res { + c.messages = append(c.messages, msg) } - if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, testRecords); err != nil { + return nil +} +func (c *testPluginClient) Close(context.Context) error { + return nil +} + +func TestPluginSuccess(t *testing.T) { + ctx := context.Background() + p := NewPlugin("test", "v1.0.0", newTestPluginClient) + if err := p.Init(ctx, &testPluginSpec{}); err != nil { t.Fatal(err) } - gotRecords, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{testTable.Name}, - }) + tables, err := p.Tables(ctx) if err != nil { t.Fatal(err) } - if len(gotRecords) != len(testRecords) { - t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) - } - if !array.RecordEqual(testRecords[0], gotRecords[0]) { - t.Fatal("records are not equal") + if len(tables) != 0 { + t.Fatal("expected 0 tables") } - records, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{testTable.Name}, - }) - if err != nil { + if err := p.WriteAll(ctx, WriteOptions{}, nil); err != nil { t.Fatal(err) } - if len(records) != 1 { - t.Fatalf("got %d resources, want 1", len(records)) + if err := p.WriteAll(ctx, WriteOptions{}, []Message{ + MessageCreateTable{}, + }); err != nil { + t.Fatal(err) } - - if !array.RecordEqual(testRecords[0], records[0]) { - t.Fatal("records are not equal") + if len(p.client.(*testPluginClient).messages) != 1 { + t.Fatal("expected 1 message") } - newSyncTime := time.Now().UTC() - if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { - t.Fatal(err) - } - records, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{testTable.Name}, - }) + messages, err := p.SyncAll(ctx, SyncOptions{}) if err != nil { t.Fatal(err) } - if len(records) != 0 { - t.Fatalf("got %d resources, want 0", len(records)) + if len(messages) != 1 { + t.Fatal("expected 1 message") } if err := p.Close(ctx); err != nil { diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 009ac23ad7..5b5d64912a 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -10,11 +10,11 @@ type WriteOptions struct { // this function is currently used mostly for testing so it's not a public api func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { resources := []Message{resource} - return p.writeAll(ctx, options, resources) + return p.WriteAll(ctx, options, resources) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []Message) error { +func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []Message) error { ch := make(chan Message, len(resources)) for _, resource := range resources { ch <- resource diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go.backup similarity index 100% rename from plugin/testing_sync.go rename to plugin/testing_sync.go.backup diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index f6b16f3ae3..55a1c0e82d 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -11,7 +11,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (s *PluginTestSuite) testUpsert(ctx context.Context) error { +func (s *WriterTestSuite) testUpsert(ctx context.Context) error { tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) table := &schema.Table{ Name: tableName, @@ -27,15 +27,16 @@ func (s *PluginTestSuite) testUpsert(ctx context.Context) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("foo") + record := bldr.NewRecord() if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, Upsert: true, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { @@ -47,13 +48,13 @@ func (s *PluginTestSuite) testUpsert(ctx context.Context) error { } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, Upsert: true, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 5a358376af..03046d0c87 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -2,17 +2,13 @@ package plugin import ( "context" - "sort" - "strings" "testing" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/types" ) -type PluginTestSuite struct { +type WriterTestSuite struct { tests PluginTestSuiteTests plugin *Plugin @@ -45,8 +41,8 @@ type PluginTestSuiteTests struct { // Usually when a destination is not supporting primary keys SkipUpsert bool - // SkipDelete skips testing MessageDelete events. - SkipDelete bool + // SkipDeleteStale skips testing MessageDelete events. + SkipDeleteStale bool // SkipAppend skips testing MessageInsert and Upsert=false. SkipInsert bool @@ -61,27 +57,27 @@ type PluginTestSuiteTests struct { type NewPluginFunc func() *Plugin -func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *PluginTestSuite) { - return func(o *PluginTestSuite) { +func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *WriterTestSuite) { + return func(o *WriterTestSuite) { o.allowNull = allowNull } } -func WithTestIgnoreNullsInLists() func(o *PluginTestSuite) { - return func(o *PluginTestSuite) { +func WithTestIgnoreNullsInLists() func(o *WriterTestSuite) { + return func(o *WriterTestSuite) { o.ignoreNullsInLists = true } } -func WithTestDataOptions(opts schema.TestSourceOptions) func(o *PluginTestSuite) { - return func(o *PluginTestSuite) { +func WithTestDataOptions(opts schema.TestSourceOptions) func(o *WriterTestSuite) { + return func(o *WriterTestSuite) { o.genDatOptions = opts } } -func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *PluginTestSuite)) { +func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *WriterTestSuite)) { t.Helper() - suite := &PluginTestSuite{ + suite := &WriterTestSuite{ tests: tests, plugin: p, } @@ -112,12 +108,12 @@ func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, } }) - t.Run("TestDelete", func(t *testing.T) { + t.Run("TestDeleteStale", func(t *testing.T) { t.Helper() - if suite.tests.SkipDelete { + if suite.tests.SkipDeleteStale { t.Skip("skipping " + t.Name()) } - if err := suite.testDelete(ctx); err != nil { + if err := suite.testDeleteStale(ctx); err != nil { t.Fatal(err) } }) @@ -127,25 +123,7 @@ func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeSafe - writeMode := WriteModeOverwrite - suite.destinationPluginTestMigrate(ctx, t, p, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) - }) - -} - -func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { - syncTimeIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) - cqIDIndex := table.Columns.Index(schema.CqIDColumn.Name) - sort.Slice(records, func(i, j int) bool { - // sort by sync time, then UUID - first := records[i].Column(syncTimeIndex).(*array.Timestamp).Value(0).ToTime(arrow.Millisecond) - second := records[j].Column(syncTimeIndex).(*array.Timestamp).Value(0).ToTime(arrow.Millisecond) - if first.Equal(second) { - firstUUID := records[i].Column(cqIDIndex).(*types.UUIDArray).Value(0).String() - secondUUID := records[j].Column(cqIDIndex).(*types.UUIDArray).Value(0).String() - return strings.Compare(firstUUID, secondUUID) < 0 - } - return first.Before(second) + suite.testMigrate(ctx, t, MigrateModeSafe) + suite.testMigrate(ctx, t, MigrateModeForce) }) } diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index bb4c44c2d8..ad569e5baf 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -5,21 +5,20 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/schema" // "github.com/cloudquery/plugin-sdk/v4/types" ) -func (s *PluginTestSuite) testDelete(ctx context.Context) error { +func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { tableName := fmt.Sprintf("cq_delete_%d", time.Now().Unix()) syncTime := time.Now().UTC().Round(1 * time.Second) table := &schema.Table{ Name: tableName, Columns: []schema.Column{ - {Name: "name", Type: arrow.BinaryTypes.String}, - {Name: "sync_time", Type: arrow.FixedWidthTypes.Timestamp_us}, + schema.CqSourceNameColumn, + schema.CqSyncTimeColumn, }, } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ @@ -31,16 +30,15 @@ func (s *PluginTestSuite) testDelete(ctx context.Context) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("test") bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime) - bldr.Field(0).(*array.StringBuilder).Append("test") - bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) + record := bldr.NewRecord() if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { @@ -48,27 +46,23 @@ func (s *PluginTestSuite) testDelete(ctx context.Context) error { } totalItems := messages.InsertItems() - if totalItems != 2 { - return fmt.Errorf("expected 2 items, got %d", totalItems) + if totalItems != 1 { + return fmt.Errorf("expected 1 items, got %d", totalItems) } bldr = array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("test") bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDelete{ - Record: bldr.NewRecord(), - WhereClauses: []WhereClause{ - { - Column: "name", - Operator: OperatorLessThan, - }, - }, + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDeleteStale{ + Table: table, + SourceName: "test", + SyncTime: syncTime, }); err != nil { return fmt.Errorf("failed to delete stale records: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index 4bc7f66c86..36004ac173 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -11,7 +11,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (s *PluginTestSuite) testInsert(ctx context.Context) error { +func (s *WriterTestSuite) testInsert(ctx context.Context) error { tableName := fmt.Sprintf("cq_test_insert_%d", time.Now().Unix()) table := &schema.Table{ Name: tableName, @@ -27,15 +27,16 @@ func (s *PluginTestSuite) testInsert(ctx context.Context) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("foo") + record := bldr.NewRecord() if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), - Upsert: true, + Record: record, + Upsert: false, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { @@ -47,12 +48,12 @@ func (s *PluginTestSuite) testInsert(ctx context.Context) error { } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 78468a817e..13c1fede30 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -17,7 +17,7 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { +func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ Table: source, }); err != nil { @@ -41,7 +41,7 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{source.Name}, }) if err != nil { @@ -53,8 +53,8 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ - Table: target, - Force: strategy == MigrateModeForce, + Table: target, + MigrateForce: strategy == MigrateModeForce, }); err != nil { return fmt.Errorf("failed to create table: %w", err) } @@ -65,7 +65,7 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{source.Name}, }) if err != nil { @@ -86,7 +86,7 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou return nil } -func (s *PluginTestSuite) testMigrate( +func (s *WriterTestSuite) testMigrate( ctx context.Context, t *testing.T, mode MigrateMode, diff --git a/plugin/testing_write_upsert.go b/plugin/testing_write_upsert.go deleted file mode 100644 index 4ee1ba9db7..0000000000 --- a/plugin/testing_write_upsert.go +++ /dev/null @@ -1,69 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -func (s *PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin) error { - tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) - table := &schema.Table{ - Name: tableName, - Columns: []schema.Column{ - {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, - }, - } - if err := p.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ - Table: table, - }); err != nil { - return fmt.Errorf("failed to create table: %w", err) - } - - bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) - bldr.Field(0).(*array.StringBuilder).Append("foo") - - if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), - Upsert: true, - }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) - } - - messages, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) - if err != nil { - return fmt.Errorf("failed to sync: %w", err) - } - totalItems := messages.InsertItems() - if totalItems != 1 { - return fmt.Errorf("expected 1 item, got %d", totalItems) - } - - if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), - Upsert: true, - }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) - } - - messages, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) - if err != nil { - return fmt.Errorf("failed to sync: %w", err) - } - - totalItems = messages.InsertItems() - if totalItems != 1 { - return fmt.Errorf("expected 1 item, got %d", totalItems) - } - - return nil -} diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 08d8c86166..66f56845d7 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -27,33 +27,33 @@ const ( defaultConcurrency = 200000 ) -type SchedulerStrategy int +type Strategy int const ( - SchedulerDFS SchedulerStrategy = iota - SchedulerRoundRobin + StrategyDFS Strategy = iota + StrategyRoundRobin ) -var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} +var AllSchedulers = Strategies{StrategyDFS, StrategyRoundRobin} var AllSchedulerNames = [...]string{ - SchedulerDFS: "dfs", - SchedulerRoundRobin: "round-robin", + StrategyDFS: "dfs", + StrategyRoundRobin: "round-robin", } -type Schedulers []SchedulerStrategy +type Strategies []Strategy -func (s Schedulers) String() string { +func (s Strategies) String() string { var buffer bytes.Buffer - for i, scheduler := range s { + for i, strategy := range s { if i > 0 { buffer.WriteString(", ") } - buffer.WriteString(scheduler.String()) + buffer.WriteString(strategy.String()) } return buffer.String() } -func (s SchedulerStrategy) String() string { +func (s Strategy) String() string { return AllSchedulerNames[s] } @@ -77,7 +77,7 @@ func WithConcurrency(concurrency uint64) Option { } } -func WithSchedulerStrategy(strategy SchedulerStrategy) Option { +func WithSchedulerStrategy(strategy Strategy) Option { return func(s *Scheduler) { s.strategy = strategy } @@ -87,7 +87,7 @@ type Scheduler struct { tables schema.Tables client schema.ClientMeta caser *caser.Caser - strategy SchedulerStrategy + strategy Strategy // status sync metrics metrics *Metrics maxDepth uint64 @@ -124,9 +124,9 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { go func() { defer close(resources) switch s.strategy { - case SchedulerDFS: + case StrategyDFS: s.syncDfs(ctx, resources) - case SchedulerRoundRobin: + case StrategyRoundRobin: s.syncRoundRobin(ctx, resources) default: panic(fmt.Errorf("unknown scheduler %s", s.strategy)) @@ -142,24 +142,24 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { return nil } -func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { +func (s *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { clientName := client.ID() for _, table := range tables { - metrics := p.metrics.TableClient[table.Name][clientName] - p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) + metrics := s.metrics.TableClient[table.Name][clientName] + s.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") + s.logTablesMetrics(table.Relations, client) } } -func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { +func (s *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { var validationErr *schema.ValidationError ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) defer cancel() resource := schema.NewResourceData(table, parent, item) objectStartTime := time.Now() clientID := client.ID() - tableMetrics := p.metrics.TableClient[table.Name][clientID] - logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() + tableMetrics := s.metrics.TableClient[table.Name][clientID] + logger := s.logger.With().Str("table", table.Name).Str("client", clientID).Logger() defer func() { if err := recover(); err != nil { stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) @@ -186,7 +186,7 @@ func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, cl } for _, c := range table.Columns { - p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) + s.resolveColumn(ctx, logger, tableMetrics, client, resource, c) } if table.PostResourceResolver != nil { @@ -205,7 +205,7 @@ func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, cl return resource } -func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { +func (s *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { var validationErr *schema.ValidationError columnStartTime := time.Now() defer func() { @@ -235,7 +235,7 @@ func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, ta } } else { // base use case: try to get column with CamelCase name - v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) + v := funk.Get(resource.GetItem(), s.caser.ToPascal(c.Name), funk.WithAllowZero()) if v != nil { err := resource.Set(c.Name, v) if err != nil { diff --git a/scheduler/scheduler_dfs.go b/scheduler/scheduler_dfs.go index f0d465684f..86f2874ec6 100644 --- a/scheduler/scheduler_dfs.go +++ b/scheduler/scheduler_dfs.go @@ -17,7 +17,7 @@ import ( func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) + tableConcurrency := max(s.concurrency/minResourceConcurrency, minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency s.tableSems = make([]*semaphore.Weighted, s.maxDepth) diff --git a/scheduler/scheduler_round_robin.go b/scheduler/scheduler_round_robin.go index 43bd337862..f800caebc6 100644 --- a/scheduler/scheduler_round_robin.go +++ b/scheduler/scheduler_round_robin.go @@ -14,7 +14,7 @@ type tableClient struct { } func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) + tableConcurrency := max(s.concurrency/minResourceConcurrency, minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency s.tableSems = make([]*semaphore.Weighted, s.maxDepth) diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index 37bd9fea56..6eb6f3db01 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -3,30 +3,23 @@ package scheduler import ( "context" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/uuid" "github.com/rs/zerolog" ) type testExecutionClient struct { } -func (t *testExecutionClient) ID() string { +func (*testExecutionClient) ID() string { return "test" } var _ schema.ClientMeta = &testExecutionClient{} -var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") -var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") - -var testSyncTime = time.Now() - func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { res <- map[string]any{ "TestColumn": 3, @@ -173,10 +166,6 @@ var syncTestCases = []syncTestCase{ table: testTableSuccess(), data: []scalar.Vector{ { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, - // &scalar.UUID{}, &scalar.Int64{Value: 3, Valid: true}, }, }, @@ -196,17 +185,9 @@ var syncTestCases = []syncTestCase{ table: testTableRelationSuccess(), data: []scalar.Vector{ { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, - // &scalar.UUID{}, &scalar.Int64{Value: 3, Valid: true}, }, { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, &scalar.Int64{Value: 3, Valid: true}, }, }, @@ -216,10 +197,6 @@ var syncTestCases = []syncTestCase{ table: testTableSuccessWithPK(), data: []scalar.Vector{ { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: deterministicStableUUID, Valid: true}, - // &scalar.UUID{}, &scalar.Int64{Value: 3, Valid: true}, }, }, @@ -240,7 +217,7 @@ func TestScheduler(t *testing.T) { } } -func testSyncTable(t *testing.T, tc syncTestCase, strategy SchedulerStrategy, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, @@ -249,7 +226,7 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy SchedulerStrategy, de opts := []Option{ WithLogger(zerolog.New(zerolog.NewTestWriter(t))), WithSchedulerStrategy(strategy), - // WithDeterministicCQId(deterministicCQID), + WithDeterministicCQId(deterministicCQID), } sc := NewScheduler(tables, &c, opts...) records := make(chan arrow.Record, 10) diff --git a/schema/arrow.go b/schema/arrow.go index f7f61dbe61..4baa2a4b86 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -52,14 +52,47 @@ func (s Schemas) Encode() ([][]byte, error) { return ret, nil } +func RecordToBytes(record arrow.Record) ([]byte, error) { + var buf bytes.Buffer + wr := ipc.NewWriter(&buf, ipc.WithSchema(record.Schema())) + if err := wr.Write(record); err != nil { + return nil, err + } + if err := wr.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func NewRecordFromBytes(b []byte) (arrow.Record, error) { + rdr, err := ipc.NewReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + for rdr.Next() { + rec := rdr.Record() + rec.Retain() + return rec, nil + } + return nil, nil +} + +func NewSchemaFromBytes(b []byte) (*arrow.Schema, error) { + rdr, err := ipc.NewReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + return rdr.Schema(), nil +} + func NewSchemasFromBytes(b [][]byte) (Schemas, error) { + var err error ret := make([]*arrow.Schema, len(b)) for i, buf := range b { - rdr, err := ipc.NewReader(bytes.NewReader(buf)) + ret[i], err = NewSchemaFromBytes(buf) if err != nil { return nil, err } - ret[i] = rdr.Schema() } return ret, nil } diff --git a/schema/table.go b/schema/table.go index 4475170104..9e84f637b1 100644 --- a/schema/table.go +++ b/schema/table.go @@ -1,11 +1,13 @@ package schema import ( + "bytes" "context" "fmt" "regexp" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/cloudquery/plugin-sdk/v4/internal/glob" "golang.org/x/exp/slices" ) @@ -106,6 +108,14 @@ func NewTablesFromArrowSchemas(schemas []*arrow.Schema) (Tables, error) { return tables, nil } +func NewTableFromBytes(b []byte) (*Table, error) { + sc, err := NewSchemaFromBytes(b) + if err != nil { + return nil, err + } + return NewTableFromArrowSchema(sc) +} + // Create a CloudQuery Table abstraction from an arrow schema // arrow schema is a low level representation of a table that can be sent // over the wire in a cross-language way @@ -365,6 +375,15 @@ func (t *Table) PrimaryKeysIndexes() []int { return primaryKeys } +func (t *Table) ToArrowSchemaBytes() ([]byte, error) { + sc := t.ToArrowSchema() + var buf bytes.Buffer + wr := ipc.NewWriter(&buf, ipc.WithSchema(sc)) + if err := wr.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} func (t *Table) ToArrowSchema() *arrow.Schema { fields := make([]arrow.Field, len(t.Columns)) diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 181474c3c4..ff0ad377d3 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -7,7 +7,6 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" @@ -16,6 +15,7 @@ import ( schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" @@ -24,7 +24,7 @@ import ( ) func TestDestination(t *testing.T) { - p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + p := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -129,20 +129,20 @@ func TestDestination(t *testing.T) { } // serversDestination table := serversDestination.TableV2ToV3(tableV2) - readCh := make(chan arrow.Record, 1) - if err := p.Sync(ctx, plugin.SyncOptions{ + msgs, err := p.SyncAll(ctx, plugin.SyncOptions{ Tables: []string{tableName}, - }, readCh); err != nil { + }) + if err != nil { t.Fatal(err) } - close(readCh) totalResources := 0 destRecord := serversDestination.CQTypesOneToRecord(memory.DefaultAllocator, destResource.Data, table.ToArrowSchema()) - for resource := range readCh { + for _, msg := range msgs { totalResources++ - if !array.RecordEqual(destRecord, resource) { + m := msg.(*plugin.MessageInsert) + if !array.RecordEqual(destRecord, m.Record) { // diff := destination.RecordDiff(destRecord, resource) - t.Fatalf("expected %v but got %v", destRecord, resource) + t.Fatalf("expected %v but got %v", destRecord, m.Record) } } if totalResources != 1 { diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 3f15930022..abc789ff2d 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -8,11 +8,11 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" @@ -21,7 +21,7 @@ import ( ) func TestDestinationV1(t *testing.T) { - p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + p := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -129,19 +129,20 @@ func TestDestinationV1(t *testing.T) { t.Fatal(err) } // serversDestination - readCh := make(chan arrow.Record, 1) - if err := p.Sync(ctx, plugin.SyncOptions{ + msgs, err := p.SyncAll(ctx, plugin.SyncOptions{ Tables: []string{tableName}, - }, readCh); err != nil { + }) + if err != nil { t.Fatal(err) } - close(readCh) totalResources := 0 - for resource := range readCh { + for _, msg := range msgs { totalResources++ - if !array.RecordEqual(rec, resource) { - diff := plugin.RecordDiff(rec, resource) - t.Fatalf("diff at %d: %s", totalResources, diff) + m := msg.(*plugin.MessageInsert) + if !array.RecordEqual(rec, m.Record) { + // diff := plugin.RecordDiff(rec, resource) + // t.Fatalf("diff at %d: %s", totalResources, diff) + t.Fatalf("expected %v but got %v", rec, m.Record) } } if totalResources != 1 { diff --git a/serve/plugin_test.go b/serve/plugin_test.go index d5357d1cb5..fb49d3f55e 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -8,7 +8,9 @@ import ( "testing" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" @@ -60,6 +62,10 @@ func TestPluginServe(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } + if _, err := c.Init(ctx, &pb.Init_Request{}); err != nil { + t.Fatal(err) + } + getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) if err != nil { t.Fatal(err) @@ -70,14 +76,58 @@ func TestPluginServe(t *testing.T) { t.Fatal(err) } - if len(tables) != 2 { - t.Fatalf("Expected 2 tables but got %d", len(tables)) + if len(tables) != 0 { + t.Fatalf("Expected 0 tables but got %d", len(tables)) } - if _, err := c.Init(ctx, &pb.Init_Request{}); err != nil { + testTable := schema.Table{ + Name: "test_table", + Columns: []schema.Column{ + { + Name: "col1", + Type: arrow.BinaryTypes.String, + }, + }, + } + bldr := array.NewRecordBuilder(memory.DefaultAllocator, testTable.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("test") + record := bldr.NewRecord() + recordBytes, err := schema.RecordToBytes(record) + if err != nil { + t.Fatal(err) + } + tableBytes, err := testTable.ToArrowSchemaBytes() + if err != nil { + t.Fatal(err) + } + writeClient, err := c.Write(ctx) + if err != nil { + t.Fatal(err) + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_CreateTable{ + CreateTable: &pb.MessageCreateTable{ + Table: tableBytes, + }, + }, + }); err != nil { + t.Fatal(err) + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_Insert{ + Insert: &pb.MessageInsert{ + Record: recordBytes, + }, + }, + }); err != nil { + t.Fatal(err) + } + if _, err := writeClient.CloseAndRecv(); err != nil { t.Fatal(err) } - syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) + syncClient, err := c.Sync(ctx, &pb.Sync_Request{ + Tables: []string{"test_table"}, + }) if err != nil { t.Fatal(err) } @@ -90,7 +140,8 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatal(err) } - rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) + m := r.Message.(*pb.Sync_Response_Insert) + rdr, err := ipc.NewReader(bytes.NewReader(m.Insert.Record)) if err != nil { t.Fatal(err) } @@ -111,8 +162,8 @@ func TestPluginServe(t *testing.T) { if tableName != "test_table" { t.Fatalf("Expected resource with table name test_table. got: %s", tableName) } - if len(resource.Columns()) != 5 { - t.Fatalf("Expected resource with data length 3 but got %d", len(resource.Columns())) + if len(resource.Columns()) != 1 { + t.Fatalf("Expected resource with data length 1 but got %d", len(resource.Columns())) } totalResources++ } diff --git a/transformers/tables.go b/transformers/tables.go index 9ffbc3dd1f..f8e7c5b46f 100644 --- a/transformers/tables.go +++ b/transformers/tables.go @@ -7,10 +7,10 @@ import ( ) // Set parent links on relational tables -func setParents(tables schema.Tables, parent *schema.Table) { +func SetParents(tables schema.Tables, parent *schema.Table) { for _, table := range tables { table.Parent = parent - setParents(table.Relations, table) + SetParents(table.Relations, table) } } diff --git a/writers/batch_test.go b/writers/batch_test.go index 0ca94fc1ad..dcc38f64aa 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -13,7 +13,7 @@ import ( type testBatchClient struct { } -func (c *testBatchClient) WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error { +func (c *testBatchClient) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { return nil } From da742a71a68f87a1f079b7358e5b00c74bb06ad1 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 12 Jun 2023 23:20:08 +0300 Subject: [PATCH 017/125] fix --- plugin/plugin_reader.go | 1 - serve/state_v3_test.go.backup | 57 ----------------------------------- 2 files changed, 58 deletions(-) delete mode 100644 serve/state_v3_test.go.backup diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 4f9d51e66a..0544af3738 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -80,7 +80,6 @@ func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- Messa return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - // p.syncTime = options.SyncTime // startTime := time.Now() if err := p.client.Sync(ctx, options, res); err != nil { diff --git a/serve/state_v3_test.go.backup b/serve/state_v3_test.go.backup deleted file mode 100644 index f75d53353b..0000000000 --- a/serve/state_v3_test.go.backup +++ /dev/null @@ -1,57 +0,0 @@ -package serve - -import ( - "context" - "sync" - "testing" - - "github.com/cloudquery/plugin-sdk/v4/internal/state" - "github.com/cloudquery/plugin-sdk/v4/plugin" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -func TestStateV3(t *testing.T) { - p := plugin.NewPlugin("memdb", "v1.0.0", plugin.NewMemDBClient) - srv := Plugin(p, WithArgs("serve"), WithTestListener()) - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) - var wg sync.WaitGroup - wg.Add(1) - var serverErr error - go func() { - defer wg.Done() - serverErr = srv.Serve(ctx) - }() - defer func() { - cancel() - wg.Wait() - }() - - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) - if err != nil { - t.Fatalf("Failed to dial bufnet: %v", err) - } - - stateClient, err := state.NewClient(ctx, "test", conn) - if err != nil { - t.Fatalf("Failed to create state client: %v", err) - } - if err := stateClient.SetKey(ctx, "testKey", "testValue"); err != nil { - t.Fatalf("Failed to set key: %v", err) - } - key, err := stateClient.GetKey(ctx, "testKey") - if err != nil { - t.Fatalf("Failed to get key: %v", err) - } - if key != "testValue" { - t.Fatalf("Unexpected key value: %v", key) - } - - cancel() - wg.Wait() - if serverErr != nil { - t.Fatal(serverErr) - } -} From e6b8c4117ddf9ff90317852b78852582cecf1fcb Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Tue, 13 Jun 2023 17:05:37 +0100 Subject: [PATCH 018/125] Add MixedBatchWriter (WIP) --- writers/batch.go | 5 +++ writers/mixed_batch.go | 80 +++++++++++++++++++++++++++++++++++++ writers/mixed_batch_test.go | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 writers/mixed_batch.go create mode 100644 writers/mixed_batch_test.go diff --git a/writers/batch.go b/writers/batch.go index 186643aaf0..1ee914d52f 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -9,10 +9,15 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/internal/pk" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) +type Writer interface { + Write(ctx context.Context, res <-chan plugin.Message) error +} + const ( defaultBatchTimeoutSeconds = 20 defaultBatchSize = 10000 diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go new file mode 100644 index 0000000000..547b05e971 --- /dev/null +++ b/writers/mixed_batch.go @@ -0,0 +1,80 @@ +package writers + +import ( + "context" + "sync" + "time" + + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/rs/zerolog" +) + +// MixedBatchClient is a client that will receive batches of messages for a mixture of tables. +type MixedBatchClient interface { + CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error + InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error + DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error +} + +type MixedBatchWriter struct { + tables schema.Tables + client MixedBatchClient + workers map[string]*worker + workersLock *sync.Mutex + + logger zerolog.Logger + batchTimeout time.Duration + batchSize int + batchSizeBytes int +} + +// Assert at compile-time that MixedBatchWriter implements the Writer interface +var _ Writer = (*MixedBatchWriter)(nil) + +type MixedBatchWriterOption func(writer *MixedBatchWriter) + +func WithMixedBatchWriterLogger(logger zerolog.Logger) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.logger = logger + } +} + +func WithMixedBatchWriterBatchTimeout(timeout time.Duration) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.batchTimeout = timeout + } +} + +func WithMixedBatchWriterBatchSize(size int) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.batchSize = size + } +} + +func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.batchSizeBytes = size + } +} + +func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { + c := &MixedBatchWriter{ + tables: tables, + client: client, + workers: make(map[string]*worker), + workersLock: &sync.Mutex{}, + logger: zerolog.Nop(), + batchTimeout: defaultBatchTimeoutSeconds * time.Second, + batchSize: defaultBatchSize, + batchSizeBytes: defaultBatchSizeBytes, + } + for _, opt := range opts { + opt(c) + } + return c, nil +} + +func (c *MixedBatchWriter) Write(ctx context.Context, res <-chan plugin.Message) error { + return nil // TODO +} diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go new file mode 100644 index 0000000000..05c505325f --- /dev/null +++ b/writers/mixed_batch_test.go @@ -0,0 +1,69 @@ +package writers + +import ( + "context" + "testing" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type testMixedBatchClient struct { +} + +func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error { + return nil +} + +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error { + return nil +} + +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error { + return nil +} + +func TestMixedBatchWriter(t *testing.T) { + ctx := context.Background() + tables := schema.Tables{ + { + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + { + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + } + + wr, err := NewMixedBatchWriter(tables, &testMixedBatchClient{}) + if err != nil { + t.Fatal(err) + } + ch := make(chan plugin.Message, 1) + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) + bldr.Field(0).(*array.Int64Builder).Append(1) + rec := bldr.NewRecord() + msg := plugin.MessageInsert{ + Record: rec, + } + ch <- msg + close(ch) + if err := wr.Write(ctx, ch); err != nil { + t.Fatal(err) + } +} From d2de7f1611b5359bf99ead73364d6a5616482667 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Wed, 14 Jun 2023 10:37:10 +0100 Subject: [PATCH 019/125] workers, but probably won't use this --- writers/mixed_batch.go | 188 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 12 deletions(-) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index 547b05e971..c81c4008c5 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -2,31 +2,43 @@ package writers import ( "context" + "reflect" "sync" "time" + "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) -// MixedBatchClient is a client that will receive batches of messages for a mixture of tables. +const ( + msgTypeCreateTable = iota + msgTypeInsert + msgTypeDeleteStale +) + +var allMsgTypes = []int{msgTypeCreateTable, msgTypeInsert, msgTypeDeleteStale} + +// MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error - InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error - DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error + CreateTableBatch(ctx context.Context, messages []plugin.MessageCreateTable) error + InsertBatch(ctx context.Context, messages []plugin.MessageInsert) error + DeleteStaleBatch(ctx context.Context, messages []plugin.MessageDeleteStale) error } type MixedBatchWriter struct { - tables schema.Tables - client MixedBatchClient - workers map[string]*worker - workersLock *sync.Mutex - + tables schema.Tables + client MixedBatchClient logger zerolog.Logger batchTimeout time.Duration batchSize int batchSizeBytes int + + workerCreateTable *mixedBatchWorker[plugin.MessageCreateTable] + workerInsert *mixedBatchWorker[plugin.MessageInsert] + workerDeleteStale *mixedBatchWorker[plugin.MessageDeleteStale] + workersLock *sync.Mutex } // Assert at compile-time that MixedBatchWriter implements the Writer interface @@ -58,16 +70,86 @@ func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { } } +type mixedBatchWorker[T plugin.Message] struct { + count int + wg *sync.WaitGroup + ch chan T + flush chan chan bool + messages []T + writeFunc func(ctx context.Context, messages []T) error +} + +func newWorker[T plugin.Message](writeFunc func(ctx context.Context, messages []T) error) *mixedBatchWorker[T] { + w := &mixedBatchWorker[T]{ + writeFunc: writeFunc, + messages: make([]T, 0, defaultBatchSize), + count: 0, + ch: make(chan T), + wg: &sync.WaitGroup{}, + } + return w +} + +func (w *mixedBatchWorker[T]) listen(ctx context.Context, ch <-chan T) chan chan bool { + flush := make(chan chan bool, 1) + w.wg.Add(1) + go func() { + defer w.wg.Done() + w.start(ctx, ch, flush) + }() + return flush +} + +func (w *mixedBatchWorker[T]) start(ctx context.Context, ch <-chan T, flush chan chan bool) { + sizeBytes := int64(0) + messages := make([]T, 0) + + for { + select { + case msg, ok := <-ch: + if !ok { + if len(messages) > 0 { + w.writeFunc(ctx, messages) + } + return + } + if uint64(len(messages)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { + w.writeFunc(ctx, messages) + messages = make([]T, 0) + sizeBytes = 0 + } + messages = append(messages, msg) + sizeBytes += util.TotalRecordSize(msg) + case <-time.After(w.batchTimeout): + if len(messages) > 0 { + w.writeFunc(ctx, messages) + messages = make([]T, 0) + sizeBytes = 0 + } + case done := <-flush: + if len(messages) > 0 { + w.writeFunc(ctx, messages) + messages = make([]T, 0) + sizeBytes = 0 + } + done <- true + } + } +} + func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { c := &MixedBatchWriter{ tables: tables, client: client, - workers: make(map[string]*worker), workersLock: &sync.Mutex{}, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, batchSize: defaultBatchSize, batchSizeBytes: defaultBatchSizeBytes, + + workerCreateTable: newWorker[plugin.MessageCreateTable](client.CreateTableBatch), + workerInsert: newWorker[plugin.MessageInsert](client.InsertBatch), + workerDeleteStale: newWorker[plugin.MessageDeleteStale](client.DeleteStaleBatch), } for _, opt := range opts { opt(c) @@ -75,6 +157,88 @@ func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ... return c, nil } -func (c *MixedBatchWriter) Write(ctx context.Context, res <-chan plugin.Message) error { - return nil // TODO +// Write starts listening for messages on the msgChan channel and writes them to the client in batches. +func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Message) error { + w.workersLock.Lock() + flushCreateTable := w.workerCreateTable.listen(ctx, msgChan) + flushInsert := w.workerInsert.listen(ctx, msgChan) + flushDeleteStale := w.workerDeleteStale.listen(ctx, msgChan) + w.workersLock.Unlock() + + done := make(chan bool) + for msg := range msgChan { + switch v := msg.(type) { + case plugin.MessageCreateTable: + w.workerCreateTable.ch <- v + case plugin.MessageInsert: + flushCreateTable <- done + <-done + flushDeleteStale <- done + <-done + w.workerInsert.ch <- v + case plugin.MessageDeleteStale: + flushCreateTable <- done + <-done + flushInsert <- done + <-done + w.workerDeleteStale.ch <- v + } + } + + flushCreateTable <- done + <-done + + flushInsert <- done + <-done + + flushDeleteStale <- done + <-done + + w.workersLock.Lock() + close(w.workerCreateTable.ch) + close(w.workerInsert.ch) + close(w.workerDeleteStale.ch) + + w.workersLock.Unlock() + return nil +} + +func (w *MixedBatchWriter) flush(ctx context.Context, messageID int, messages []plugin.Message) error { + var err error + switch messageID { + case msgTypeCreateTable: + msgs := make([]plugin.MessageCreateTable, len(messages)) + for i := range messages { + msgs[i] = messages[i].(plugin.MessageCreateTable) + } + err = w.client.CreateTableBatch(ctx, msgs) + case msgTypeInsert: + // TODO: should we remove duplicates here? + w.writeInsert(ctx, messages) + case msgTypeDeleteStale: + w.writeDeleteStale(ctx, messages) + } + if err != nil { + + } + start := time.Now() + batchSize := len(resources) + if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { + w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") + } else { + w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") + } +} + +func messageID(msg plugin.Message) int { + switch msg.(type) { + case plugin.MessageCreateTable: + return msgTypeCreateTable + case plugin.MessageInsert: + return msgTypeInsert + case plugin.MessageDeleteStale: + return msgTypeDeleteStale + default: + panic("unknown message type: " + reflect.TypeOf(msg).String()) + } } From 1b79d012614a945052a4a180f37bce27a7b3ec68 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Wed, 14 Jun 2023 12:16:20 +0100 Subject: [PATCH 020/125] Simplified mixed batch writer --- writers/mixed_batch.go | 244 +++++++++++++++--------------------- writers/mixed_batch_test.go | 189 +++++++++++++++++++++++----- 2 files changed, 261 insertions(+), 172 deletions(-) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index c81c4008c5..d3295864ea 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -2,8 +2,6 @@ package writers import ( "context" - "reflect" - "sync" "time" "github.com/apache/arrow/go/v13/arrow/util" @@ -34,11 +32,6 @@ type MixedBatchWriter struct { batchTimeout time.Duration batchSize int batchSizeBytes int - - workerCreateTable *mixedBatchWorker[plugin.MessageCreateTable] - workerInsert *mixedBatchWorker[plugin.MessageInsert] - workerDeleteStale *mixedBatchWorker[plugin.MessageDeleteStale] - workersLock *sync.Mutex } // Assert at compile-time that MixedBatchWriter implements the Writer interface @@ -70,86 +63,14 @@ func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { } } -type mixedBatchWorker[T plugin.Message] struct { - count int - wg *sync.WaitGroup - ch chan T - flush chan chan bool - messages []T - writeFunc func(ctx context.Context, messages []T) error -} - -func newWorker[T plugin.Message](writeFunc func(ctx context.Context, messages []T) error) *mixedBatchWorker[T] { - w := &mixedBatchWorker[T]{ - writeFunc: writeFunc, - messages: make([]T, 0, defaultBatchSize), - count: 0, - ch: make(chan T), - wg: &sync.WaitGroup{}, - } - return w -} - -func (w *mixedBatchWorker[T]) listen(ctx context.Context, ch <-chan T) chan chan bool { - flush := make(chan chan bool, 1) - w.wg.Add(1) - go func() { - defer w.wg.Done() - w.start(ctx, ch, flush) - }() - return flush -} - -func (w *mixedBatchWorker[T]) start(ctx context.Context, ch <-chan T, flush chan chan bool) { - sizeBytes := int64(0) - messages := make([]T, 0) - - for { - select { - case msg, ok := <-ch: - if !ok { - if len(messages) > 0 { - w.writeFunc(ctx, messages) - } - return - } - if uint64(len(messages)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { - w.writeFunc(ctx, messages) - messages = make([]T, 0) - sizeBytes = 0 - } - messages = append(messages, msg) - sizeBytes += util.TotalRecordSize(msg) - case <-time.After(w.batchTimeout): - if len(messages) > 0 { - w.writeFunc(ctx, messages) - messages = make([]T, 0) - sizeBytes = 0 - } - case done := <-flush: - if len(messages) > 0 { - w.writeFunc(ctx, messages) - messages = make([]T, 0) - sizeBytes = 0 - } - done <- true - } - } -} - func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { c := &MixedBatchWriter{ tables: tables, client: client, - workersLock: &sync.Mutex{}, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, batchSize: defaultBatchSize, batchSizeBytes: defaultBatchSizeBytes, - - workerCreateTable: newWorker[plugin.MessageCreateTable](client.CreateTableBatch), - workerInsert: newWorker[plugin.MessageInsert](client.InsertBatch), - workerDeleteStale: newWorker[plugin.MessageDeleteStale](client.DeleteStaleBatch), } for _, opt := range opts { opt(c) @@ -157,88 +78,129 @@ func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ... return c, nil } +func msgID(msg plugin.Message) int { + switch msg.(type) { + case plugin.MessageCreateTable: + return msgTypeCreateTable + case plugin.MessageInsert: + return msgTypeInsert + case plugin.MessageDeleteStale: + return msgTypeDeleteStale + } + panic("unknown message type") +} + // Write starts listening for messages on the msgChan channel and writes them to the client in batches. func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Message) error { - w.workersLock.Lock() - flushCreateTable := w.workerCreateTable.listen(ctx, msgChan) - flushInsert := w.workerInsert.listen(ctx, msgChan) - flushDeleteStale := w.workerDeleteStale.listen(ctx, msgChan) - w.workersLock.Unlock() - - done := make(chan bool) + createTable := &batchManager[plugin.MessageCreateTable]{ + batch: make([]plugin.MessageCreateTable, 0, w.batchSize), + writeFunc: w.client.CreateTableBatch, + } + insert := &insertBatchManager{ + batch: make([]plugin.MessageInsert, 0, w.batchSize), + writeFunc: w.client.InsertBatch, + maxBatchSizeBytes: int64(w.batchSizeBytes), + } + deleteStale := &batchManager[plugin.MessageDeleteStale]{ + batch: make([]plugin.MessageDeleteStale, 0, w.batchSize), + writeFunc: w.client.DeleteStaleBatch, + } + flush := func(msgType int) error { + switch msgType { + case msgTypeCreateTable: + return createTable.flush(ctx) + case msgTypeInsert: + return insert.flush(ctx) + case msgTypeDeleteStale: + return deleteStale.flush(ctx) + default: + panic("unknown message type") + } + } + prevMsgType := -1 + var err error for msg := range msgChan { + msgType := msgID(msg) + if prevMsgType != -1 && prevMsgType != msgType { + if err := flush(prevMsgType); err != nil { + return err + } + } + prevMsgType = msgType switch v := msg.(type) { case plugin.MessageCreateTable: - w.workerCreateTable.ch <- v + err = createTable.append(ctx, v) case plugin.MessageInsert: - flushCreateTable <- done - <-done - flushDeleteStale <- done - <-done - w.workerInsert.ch <- v + err = insert.append(ctx, v) case plugin.MessageDeleteStale: - flushCreateTable <- done - <-done - flushInsert <- done - <-done - w.workerDeleteStale.ch <- v + err = deleteStale.append(ctx, v) + default: + panic("unknown message type") + } + if err != nil { + return err } } + return flush(prevMsgType) +} - flushCreateTable <- done - <-done - - flushInsert <- done - <-done +// generic batch manager for most message types +type batchManager[T plugin.Message] struct { + batch []T + writeFunc func(ctx context.Context, messages []T) error +} - flushDeleteStale <- done - <-done +func (m *batchManager[T]) append(ctx context.Context, msg T) error { + if len(m.batch) == cap(m.batch) { + if err := m.flush(ctx); err != nil { + return err + } + } + m.batch = append(m.batch, msg) + return nil +} - w.workersLock.Lock() - close(w.workerCreateTable.ch) - close(w.workerInsert.ch) - close(w.workerDeleteStale.ch) +func (m *batchManager[T]) flush(ctx context.Context) error { + if len(m.batch) == 0 { + return nil + } - w.workersLock.Unlock() + err := m.writeFunc(ctx, m.batch) + if err != nil { + return err + } + m.batch = m.batch[:0] return nil } -func (w *MixedBatchWriter) flush(ctx context.Context, messageID int, messages []plugin.Message) error { - var err error - switch messageID { - case msgTypeCreateTable: - msgs := make([]plugin.MessageCreateTable, len(messages)) - for i := range messages { - msgs[i] = messages[i].(plugin.MessageCreateTable) +// special batch manager for insert messages that also keeps track of the total size of the batch +type insertBatchManager struct { + batch []plugin.MessageInsert + writeFunc func(ctx context.Context, messages []plugin.MessageInsert) error + curBatchSizeBytes int64 + maxBatchSizeBytes int64 +} + +func (m *insertBatchManager) append(ctx context.Context, msg plugin.MessageInsert) error { + if len(m.batch) == cap(m.batch) || m.curBatchSizeBytes+util.TotalRecordSize(msg.Record) > m.maxBatchSizeBytes { + if err := m.flush(ctx); err != nil { + return err } - err = w.client.CreateTableBatch(ctx, msgs) - case msgTypeInsert: - // TODO: should we remove duplicates here? - w.writeInsert(ctx, messages) - case msgTypeDeleteStale: - w.writeDeleteStale(ctx, messages) } - if err != nil { + m.batch = append(m.batch, msg) + m.curBatchSizeBytes += util.TotalRecordSize(msg.Record) + return nil +} +func (m *insertBatchManager) flush(ctx context.Context) error { + if len(m.batch) == 0 { + return nil } - start := time.Now() - batchSize := len(resources) - if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { - w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") - } else { - w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") - } -} -func messageID(msg plugin.Message) int { - switch msg.(type) { - case plugin.MessageCreateTable: - return msgTypeCreateTable - case plugin.MessageInsert: - return msgTypeInsert - case plugin.MessageDeleteStale: - return msgTypeDeleteStale - default: - panic("unknown message type: " + reflect.TypeOf(msg).String()) + err := m.writeFunc(ctx, m.batch) + if err != nil { + return err } + m.batch = m.batch[:0] + return nil } diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 05c505325f..620e99c2ed 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -3,6 +3,7 @@ package writers import ( "context" "testing" + "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -12,58 +13,184 @@ import ( ) type testMixedBatchClient struct { + receivedBatches [][]plugin.Message } -func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error { +func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []plugin.MessageCreateTable) error { + m := make([]plugin.Message, len(msgs)) + for i, msg := range msgs { + m[i] = msg + } + c.receivedBatches = append(c.receivedBatches, m) return nil } -func (c *testMixedBatchClient) InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error { +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []plugin.MessageInsert) error { + m := make([]plugin.Message, len(msgs)) + for i, msg := range msgs { + m[i] = msg + } + c.receivedBatches = append(c.receivedBatches, m) return nil } -func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error { +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []plugin.MessageDeleteStale) error { + m := make([]plugin.Message, len(msgs)) + for i, msg := range msgs { + m[i] = msg + } + c.receivedBatches = append(c.receivedBatches, m) return nil } func TestMixedBatchWriter(t *testing.T) { ctx := context.Background() - tables := schema.Tables{ - { - Name: "table1", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, + + // message to create table1 + table1 := &schema.Table{ + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, - { - Name: "table2", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, + } + msgCreateTable1 := plugin.MessageCreateTable{ + Table: table1, + MigrateForce: false, + } + + // message to create table2 + table2 := &schema.Table{ + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, } + msgCreateTable2 := plugin.MessageCreateTable{ + Table: table2, + MigrateForce: false, + } + + // message to insert into table1 + bldr1 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) + bldr1.Field(0).(*array.Int64Builder).Append(1) + rec1 := bldr1.NewRecord() + msgInsertTable1 := plugin.MessageInsert{ + Record: rec1, + } + + // message to insert into table2 + bldr2 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) + bldr2.Field(0).(*array.Int64Builder).Append(1) + rec2 := bldr2.NewRecord() + msgInsertTable2 := plugin.MessageInsert{ + Record: rec2, + Upsert: false, + } - wr, err := NewMixedBatchWriter(tables, &testMixedBatchClient{}) - if err != nil { - t.Fatal(err) + // message to delete stale from table1 + msgDeleteStale1 := plugin.MessageDeleteStale{ + Table: table1, + SourceName: "my-source", + SyncTime: time.Now(), + } + msgDeleteStale2 := plugin.MessageDeleteStale{ + Table: table1, + SourceName: "my-source", + SyncTime: time.Now(), } - ch := make(chan plugin.Message, 1) - bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) - bldr.Field(0).(*array.Int64Builder).Append(1) - rec := bldr.NewRecord() - msg := plugin.MessageInsert{ - Record: rec, + testCases := []struct { + name string + messages []plugin.Message + wantBatches [][]plugin.Message + }{ + { + name: "create table, insert, delete stale", + messages: []plugin.Message{ + msgCreateTable1, + msgCreateTable2, + msgInsertTable1, + msgInsertTable2, + msgDeleteStale1, + msgDeleteStale2, + }, + wantBatches: [][]plugin.Message{ + {msgCreateTable1, msgCreateTable2}, + {msgInsertTable1, msgInsertTable2}, + {msgDeleteStale1, msgDeleteStale2}, + }, + }, + { + name: "interleaved messages", + messages: []plugin.Message{ + msgCreateTable1, + msgInsertTable1, + msgDeleteStale1, + msgCreateTable2, + msgInsertTable2, + msgDeleteStale2, + }, + wantBatches: [][]plugin.Message{ + {msgCreateTable1}, + {msgInsertTable1}, + {msgDeleteStale1}, + {msgCreateTable2}, + {msgInsertTable2}, + {msgDeleteStale2}, + }, + }, + { + name: "interleaved messages", + messages: []plugin.Message{ + msgCreateTable1, + msgCreateTable2, + msgInsertTable1, + msgDeleteStale2, + msgInsertTable2, + msgDeleteStale1, + }, + wantBatches: [][]plugin.Message{ + {msgCreateTable1, msgCreateTable2}, + {msgInsertTable1}, + {msgDeleteStale2}, + {msgInsertTable2}, + {msgDeleteStale1}, + }, + }, } - ch <- msg - close(ch) - if err := wr.Write(ctx, ch); err != nil { - t.Fatal(err) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tables := schema.Tables([]*schema.Table{table1, table2}) + client := &testMixedBatchClient{ + receivedBatches: make([][]plugin.Message, 0), + } + wr, err := NewMixedBatchWriter(tables, client) + if err != nil { + t.Fatal(err) + } + ch := make(chan plugin.Message, len(tc.messages)) + for _, msg := range tc.messages { + ch <- msg + } + close(ch) + if err := wr.Write(ctx, ch); err != nil { + t.Fatal(err) + } + if len(client.receivedBatches) != len(tc.wantBatches) { + t.Fatalf("got %d batches, want %d", len(client.receivedBatches), len(tc.wantBatches)) + } + for i, wantBatch := range tc.wantBatches { + if len(client.receivedBatches[i]) != len(wantBatch) { + t.Fatalf("got %d messages in batch %d, want %d", len(client.receivedBatches[i]), i, len(wantBatch)) + } + } + }) } } From b0aa42973d9f19e800c2094c582c63aef2d9ddf5 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 10:22:45 +0100 Subject: [PATCH 021/125] Update New --- writers/mixed_batch.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index d3295864ea..d668706862 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -63,9 +63,8 @@ func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { } } -func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { +func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { c := &MixedBatchWriter{ - tables: tables, client: client, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, From 69525b4d35efc8fa71ce550df110fff6a6cceaf3 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 10:22:58 +0100 Subject: [PATCH 022/125] Add GetTables() to Message interface --- plugin/messages.go | 17 ++++++++++++++++- plugin/plugin.go | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/plugin/messages.go b/plugin/messages.go index 43e3eedacb..3cae546e58 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -7,16 +7,29 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +type Message interface { + GetTable() *schema.Table +} + type MessageCreateTable struct { Table *schema.Table MigrateForce bool } +func (m MessageCreateTable) GetTable() *schema.Table { + return m.Table +} + type MessageInsert struct { + Table *schema.Table Record arrow.Record Upsert bool } +func (m MessageInsert) GetTable() *schema.Table { + return m.Table +} + // MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case // thus it might be deprecated in the future // in favour of MessageDelete or MessageRawQuery @@ -27,7 +40,9 @@ type MessageDeleteStale struct { SyncTime time.Time } -type Message any +func (m MessageDeleteStale) GetTable() *schema.Table { + return m.Table +} type Messages []Message diff --git a/plugin/plugin.go b/plugin/plugin.go index b583e86811..0502aea231 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -75,7 +75,7 @@ func maxDepth(tables schema.Tables) uint64 { } // NewPlugin returns a new CloudQuery Plugin with the given name, version and implementation. -// Depending on the options, it can be write only plugin, read only plugin or both. +// Depending on the options, it can be a write-only plugin, read-only plugin, or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ name: name, From 4b31116ba4dd6c2ca0f8404ed83a2a278a3cd100 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 15 Jun 2023 10:41:55 +0300 Subject: [PATCH 023/125] fix batchwriter --- plugin/messages.go | 27 +++++ writers/batch.go | 259 ++++++++++++++++++++++++++++-------------- writers/batch_test.go | 190 +++++++++++++++++++++++++++---- 3 files changed, 367 insertions(+), 109 deletions(-) diff --git a/plugin/messages.go b/plugin/messages.go index 3cae546e58..28a9acc019 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -46,6 +46,10 @@ func (m MessageDeleteStale) GetTable() *schema.Table { type Messages []Message +type CreateTables []*MessageCreateTable + +type Inserts []*MessageInsert + func (messages Messages) InsertItems() int64 { items := int64(0) for _, msg := range messages { @@ -56,3 +60,26 @@ func (messages Messages) InsertItems() int64 { } return items } + +func (m CreateTables) Exists(tableName string) bool { + for _, table := range m { + if table.Table.Name == tableName { + return true + } + } + return false +} + +func (m Inserts) Exists(tableName string) bool { + for _, insert := range m { + md := insert.Record.Schema().Metadata() + tableNameMeta, ok := md.GetValue(schema.MetadataTableName) + if !ok { + continue + } + if tableNameMeta == tableName { + return true + } + } + return false +} diff --git a/writers/batch.go b/writers/batch.go index 1ee914d52f..6c67f99549 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -12,6 +12,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" + "golang.org/x/sync/semaphore" ) type Writer interface { @@ -20,19 +21,25 @@ type Writer interface { const ( defaultBatchTimeoutSeconds = 20 + defaultMaxWorkers = int64(10000) defaultBatchSize = 10000 defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB ) type BatchWriterClient interface { - WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error + CreateTables(context.Context, []*plugin.MessageCreateTable) error + WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*plugin.MessageInsert) error + DeleteStale(context.Context, []*plugin.MessageDeleteStale) error } type BatchWriter struct { - tables schema.Tables - client BatchWriterClient - workers map[string]*worker - workersLock *sync.Mutex + client BatchWriterClient + semaphore *semaphore.Weighted + workers map[string]*worker + workersLock *sync.RWMutex + workersWaitGroup *sync.WaitGroup + createTableMessages []*plugin.MessageCreateTable + deleteStaleMessages []*plugin.MessageDeleteStale logger zerolog.Logger batchTimeout time.Duration @@ -54,6 +61,12 @@ func WithBatchTimeout(timeout time.Duration) Option { } } +func WithMaxWorkers(n int64) Option { + return func(p *BatchWriter) { + p.semaphore = semaphore.NewWeighted(n) + } +} + func WithBatchSize(size int) Option { return func(p *BatchWriter) { p.batchSize = size @@ -69,56 +82,80 @@ func WithBatchSizeBytes(size int) Option { type worker struct { count int wg *sync.WaitGroup - ch chan arrow.Record + ch chan *plugin.MessageInsert flush chan chan bool } -func NewBatchWriter(tables schema.Tables, client BatchWriterClient, opts ...Option) (*BatchWriter, error) { +func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, error) { c := &BatchWriter{ - tables: tables, - client: client, - workers: make(map[string]*worker), - workersLock: &sync.Mutex{}, - logger: zerolog.Nop(), - batchTimeout: defaultBatchTimeoutSeconds * time.Second, - batchSize: defaultBatchSize, - batchSizeBytes: defaultBatchSizeBytes, + client: client, + workers: make(map[string]*worker), + workersLock: &sync.RWMutex{}, + workersWaitGroup: &sync.WaitGroup{}, + logger: zerolog.Nop(), + batchTimeout: defaultBatchTimeoutSeconds * time.Second, + batchSize: defaultBatchSize, + batchSizeBytes: defaultBatchSizeBytes, + semaphore: semaphore.NewWeighted(defaultMaxWorkers), } for _, opt := range opts { opt(c) } + c.createTableMessages = make([]*plugin.MessageCreateTable, 0, c.batchSize) + c.deleteStaleMessages = make([]*plugin.MessageDeleteStale, 0, c.batchSize) return c, nil } -func (w *BatchWriter) worker(ctx context.Context, table *schema.Table, ch <-chan arrow.Record, flush <-chan chan bool) { +func (w *BatchWriter) Close(ctx context.Context) error { + w.workersLock.Lock() + defer w.workersLock.Unlock() + for _, w := range w.workers { + close(w.ch) + } + w.workersWaitGroup.Wait() + + return nil +} + +func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *plugin.MessageInsert, flush <-chan chan bool) { sizeBytes := int64(0) - resources := make([]arrow.Record, 0) + resources := make([]*plugin.MessageInsert, 0) + upsertBatch := false for { select { case r, ok := <-ch: if !ok { if len(resources) > 0 { - w.flush(ctx, table, resources) + w.flush(ctx, tableName, upsertBatch, resources) } return } - if uint64(len(resources)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { - w.flush(ctx, table, resources) - resources = make([]arrow.Record, 0) + if upsertBatch != r.Upsert { + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) + sizeBytes = 0 + upsertBatch = r.Upsert + resources = append(resources, r) + sizeBytes = util.TotalRecordSize(r.Record) + } else { + resources = append(resources, r) + sizeBytes += util.TotalRecordSize(r.Record) + } + if len(resources) >= w.batchSize || sizeBytes+util.TotalRecordSize(r.Record) >= int64(w.batchSizeBytes) { + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) sizeBytes = 0 } - resources = append(resources, r) - sizeBytes += util.TotalRecordSize(r) case <-time.After(w.batchTimeout): if len(resources) > 0 { - w.flush(ctx, table, resources) - resources = make([]arrow.Record, 0) + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - w.flush(ctx, table, resources) - resources = make([]arrow.Record, 0) + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) sizeBytes = 0 } done <- true @@ -129,14 +166,14 @@ func (w *BatchWriter) worker(ctx context.Context, table *schema.Table, ch <-chan } } -func (w *BatchWriter) flush(ctx context.Context, table *schema.Table, resources []arrow.Record) { - resources = w.removeDuplicatesByPK(table, resources) +func (w *BatchWriter) flush(ctx context.Context, tableName string, upsertBatch bool, resources []*plugin.MessageInsert) { + // resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) - if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { - w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") + if err := w.client.WriteTableBatch(ctx, tableName, upsertBatch, resources); err != nil { + w.logger.Err(err).Str("table", tableName).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") } else { - w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") + w.logger.Info().Str("table", tableName).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") } } @@ -167,68 +204,122 @@ func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow. return res } -func (w *BatchWriter) Write(ctx context.Context, res <-chan arrow.Record) error { - workers := make(map[string]*worker, len(w.tables)) +func (w *BatchWriter) flushCreateTables(ctx context.Context) error { + if err := w.client.CreateTables(ctx, w.createTableMessages); err != nil { + return err + } + w.createTableMessages = w.createTableMessages[:0] + return nil +} - w.workersLock.Lock() - for _, table := range w.tables { - table := table - if w.workers[table.Name] == nil { - ch := make(chan arrow.Record) - flush := make(chan chan bool) - wg := &sync.WaitGroup{} - w.workers[table.Name] = &worker{ - count: 1, - ch: ch, - flush: flush, - wg: wg, - } - wg.Add(1) - go func() { - defer wg.Done() - w.worker(ctx, table, ch, flush) - }() - } else { - w.workers[table.Name].count++ - } - // we save this locally because we don't want to access the map after that so we can - // keep the workersLock for as short as possible - workers[table.Name] = w.workers[table.Name] +func (w *BatchWriter) flushDeleteStaleTables(ctx context.Context) error { + if err := w.client.DeleteStale(ctx, w.deleteStaleMessages); err != nil { + return err } - w.workersLock.Unlock() + w.deleteStaleMessages = w.deleteStaleMessages[:0] + return nil +} - for r := range res { - tableName, ok := r.Schema().Metadata().GetValue(schema.MetadataTableName) - if !ok { - return fmt.Errorf("missing table name in record metadata") - } - if _, ok := workers[tableName]; !ok { - return fmt.Errorf("table %s not found in destination", tableName) - } - workers[tableName].ch <- r +func (w *BatchWriter) flushInsert(ctx context.Context, tableName string) { + w.workersLock.RLock() + worker, ok := w.workers[tableName] + if !ok { + w.workersLock.RUnlock() + // no tables to flush + return } + w.workersLock.RUnlock() + ch := make(chan bool) + worker.flush <- ch + <-ch +} - // flush and wait for all workers to finish flush before finish and calling delete stale - // This is because destinations can be longed lived and called from multiple sources - flushChannels := make(map[string]chan bool, len(workers)) - for tableName, w := range workers { - flushCh := make(chan bool) - flushChannels[tableName] = flushCh - w.flush <- flushCh +func (w *BatchWriter) writeAll(ctx context.Context, msgs []plugin.Message) error { + ch := make(chan plugin.Message, len(msgs)) + for _, msg := range msgs { + ch <- msg } - for tableName := range flushChannels { - <-flushChannels[tableName] + close(ch) + return w.Write(ctx, ch) +} + +func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) error { + for msg := range msgs { + switch m := msg.(type) { + case *plugin.MessageDeleteStale: + if len(w.createTableMessages) > 0 { + if err := w.flushCreateTables(ctx); err != nil { + return err + } + } + w.flushInsert(ctx, m.Table.Name) + w.deleteStaleMessages = append(w.deleteStaleMessages, m) + if len(w.deleteStaleMessages) > w.batchSize { + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err + } + } + case *plugin.MessageInsert: + if len(w.createTableMessages) > 0 { + if err := w.flushCreateTables(ctx); err != nil { + return err + } + } + if len(w.deleteStaleMessages) > 0 { + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err + } + } + if err := w.startWorker(ctx, m); err != nil { + return err + } + case *plugin.MessageCreateTable: + w.flushInsert(ctx, m.Table.Name) + if len(w.deleteStaleMessages) > 0 { + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err + } + } + w.createTableMessages = append(w.createTableMessages, m) + if len(w.createTableMessages) > w.batchSize { + if err := w.flushCreateTables(ctx); err != nil { + return err + } + } + } } + return nil +} +func (w *BatchWriter) startWorker(ctx context.Context, msg *plugin.MessageInsert) error { + w.workersLock.RLock() + md := msg.Record.Schema().Metadata() + tableName, ok := md.GetValue(schema.MetadataTableName) + if !ok { + w.workersLock.RUnlock() + return fmt.Errorf("table name not found in metadata") + } + wr, ok := w.workers[tableName] + w.workersLock.RUnlock() + if ok { + w.workers[tableName].ch <- msg + return nil + } w.workersLock.Lock() - for tableName := range workers { - w.workers[tableName].count-- - if w.workers[tableName].count == 0 { - close(w.workers[tableName].ch) - w.workers[tableName].wg.Wait() - delete(w.workers, tableName) - } + ch := make(chan *plugin.MessageInsert) + flush := make(chan chan bool) + wr = &worker{ + count: 1, + ch: ch, + flush: flush, } + w.workers[tableName] = wr w.workersLock.Unlock() + w.workersWaitGroup.Add(1) + go func() { + defer w.workersWaitGroup.Done() + w.worker(ctx, tableName, ch, flush) + }() + ch <- msg return nil } diff --git a/writers/batch_test.go b/writers/batch_test.go index dcc38f64aa..cb51311aeb 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -3,54 +3,194 @@ package writers import ( "context" "testing" + "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" ) type testBatchClient struct { + createTables []*plugin.MessageCreateTable + inserts []*plugin.MessageInsert + deleteStales []*plugin.MessageDeleteStale } -func (c *testBatchClient) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { +func (c *testBatchClient) CreateTables(_ context.Context, msgs []*plugin.MessageCreateTable) error { + c.createTables = append(c.createTables, msgs...) return nil } -func TestBatchWriter(t *testing.T) { - ctx := context.Background() - tables := schema.Tables{ - { - Name: "table1", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, +func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*plugin.MessageInsert) error { + c.inserts = append(c.inserts, msgs...) + return nil +} +func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*plugin.MessageDeleteStale) error { + c.deleteStales = append(c.deleteStales, msgs...) + return nil +} + +var batchTestTables = schema.Tables{ + { + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, - { - Name: "table2", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, + }, + { + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, - } + }, +} + +// TestBatchFlushDifferentMessages tests that if writer receives a message of a new type all other pending +// batches are flushed. +func TestBatchFlushDifferentMessages(t *testing.T) { + ctx := context.Background() - wr, err := NewBatchWriter(tables, &testBatchClient{}) + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient) if err != nil { t.Fatal(err) } - ch := make(chan arrow.Record, 1) - bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) + bldr := array.NewRecordBuilder(memory.DefaultAllocator, batchTestTables[0].ToArrowSchema()) bldr.Field(0).(*array.Int64Builder).Append(1) - ch <- bldr.NewRecord() - close(ch) - if err := wr.Write(ctx, ch); err != nil { + record := bldr.NewRecord() + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + t.Fatal(err) + } + if len(testClient.createTables) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.createTables)) + } + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{Record: record}}); err != nil { + t.Fatal(err) + } + if len(testClient.createTables) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.createTables)) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) + } + + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 1 { + t.Fatalf("expected 1 insert messages, got %d", len(testClient.inserts)) + } +} + +func TestBatchSize(t *testing.T) { + ctx := context.Background() + + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient, WithBatchSize(2)) + if err != nil { + t.Fatal(err) + } + table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} + record := array.NewRecord(table.ToArrowSchema(), nil, 0) + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { t.Fatal(err) } + // we need to wait for the batch to be flushed + time.Sleep(time.Second * 2) + + if len(testClient.inserts) != 2 { + t.Fatalf("expected 2 create table messages, got %d", len(testClient.inserts)) + } +} + +func TestBatchTimeout(t *testing.T) { + ctx := context.Background() + + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient, WithBatchTimeout(time.Second)) + if err != nil { + t.Fatal(err) + } + table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} + record := array.NewRecord(table.ToArrowSchema(), nil, 0) + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + // we need to wait for the batch to be flushed + time.Sleep(time.Millisecond * 250) + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + // we need to wait for the batch to be flushed + time.Sleep(time.Second * 1) + + if len(testClient.inserts) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + } +} + +func TestBatchUpserts(t *testing.T) { + ctx := context.Background() + + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient) + if err != nil { + t.Fatal(err) + } + table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} + record := array.NewRecord(table.ToArrowSchema(), nil, 0) + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + Upsert: true, + }}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { + t.Fatal(err) + } + // we need to wait for the batch to be flushed + time.Sleep(time.Second * 2) + + if len(testClient.inserts) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + } } From 86e6f6826096a8a03e8e6f6107a163090a240458 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 10:28:32 +0100 Subject: [PATCH 024/125] Fix tests --- writers/mixed_batch_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 620e99c2ed..b99cbcf4c0 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -167,11 +167,10 @@ func TestMixedBatchWriter(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - tables := schema.Tables([]*schema.Table{table1, table2}) client := &testMixedBatchClient{ receivedBatches: make([][]plugin.Message, 0), } - wr, err := NewMixedBatchWriter(tables, client) + wr, err := NewMixedBatchWriter(client) if err != nil { t.Fatal(err) } From 61b8fab1981ea88ef97ec714ba3fd5ec6bd1c2a6 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 15 Jun 2023 12:53:25 +0300 Subject: [PATCH 025/125] make migrate_force part of write options --- internal/memdb/memdb_test.go | 10 +--- .../servers/destination/v0/destinations.go | 14 ++--- .../servers/destination/v1/destinations.go | 14 ++--- internal/servers/plugin/v3/plugin.go | 19 ++++--- plugin/messages.go | 3 +- plugin/plugin_writer.go | 1 + plugin/testing_write.go | 22 ++++---- plugin/testing_write_migrate.go | 54 +++++++++---------- ..._test.go => destination_v0_test.go.backup} | 0 ..._test.go => destination_v1_test.go.backup} | 0 serve/{docs_test.go => docs_test.go.backup} | 4 ++ serve/plugin_test.go | 11 ++++ writers/mixed_batch_test.go | 6 +-- 13 files changed, 87 insertions(+), 71 deletions(-) rename serve/{destination_v0_test.go => destination_v0_test.go.backup} (100%) rename serve/{destination_v1_test.go => destination_v1_test.go.backup} (100%) rename serve/{docs_test.go => docs_test.go.backup} (82%) diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index 44a95c6b06..64ebcca892 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -13,17 +13,11 @@ func TestPlugin(t *testing.T) { if err := p.Init(ctx, nil); err != nil { t.Fatal(err) } - plugin.PluginTestSuiteRunner( + plugin.TestWriterSuiteRunner( t, p, plugin.PluginTestSuiteTests{ - MigrateStrategy: plugin.MigrateStrategy{ - AddColumn: plugin.MigrateModeForce, - AddColumnNotNull: plugin.MigrateModeForce, - RemoveColumn: plugin.MigrateModeForce, - RemoveColumnNotNull: plugin.MigrateModeForce, - ChangeColumn: plugin.MigrateModeForce, - }, + NonForceMigrations: plugin.NonForceMigrations{}, }, ) } diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index ad5506e161..d8c5f85c9d 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -65,12 +65,13 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr writeCh := make(chan plugin.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + }, writeCh) }) for _, table := range tables { writeCh <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + Table: table, } } close(writeCh) @@ -118,13 +119,14 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { eg, ctx := errgroup.WithContext(msg.Context()) // sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + }, msgs) }) for _, table := range tables { msgs <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + Table: table, } } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 0bfdb886ca..50578b5a6f 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -63,12 +63,13 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr writeCh := make(chan plugin.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.migrateMode == plugin.MigrateModeForce, + }, writeCh) }) for _, table := range tables { writeCh <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.migrateMode == plugin.MigrateModeForce, + Table: table, } } close(writeCh) @@ -114,13 +115,14 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + }, msgs) }) for _, table := range tables { msgs <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + Table: table, } } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 64e166e9ec..314475e8fb 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -112,8 +112,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { m.Table.ToArrowSchema() pbMsg.Message = &pb.Sync_Response_CreateTable{ CreateTable: &pb.MessageCreateTable{ - Table: nil, - MigrateForce: m.MigrateForce, + Table: nil, }, } case *plugin.MessageInsert: @@ -162,10 +161,19 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { func (s *Server) Write(msg pb.Plugin_WriteServer) error { msgs := make(chan plugin.Message) - + r, err := msg.Recv() + if err != nil { + return status.Errorf(codes.Internal, "failed to receive msg: %v", err) + } + pbWriteOptions, ok := r.Message.(*pb.Write_Request_Options) + if !ok { + return status.Errorf(codes.Internal, "expected options message, got %T", r.Message) + } eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: pbWriteOptions.Options.MigrateForce, + }, msgs) }) for { @@ -194,8 +202,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { break } pluginMessage = &plugin.MessageCreateTable{ - Table: table, - MigrateForce: pbMsg.CreateTable.MigrateForce, + Table: table, } case *pb.Write_Request_Insert: record, err := schema.NewRecordFromBytes(pbMsg.Insert.Record) diff --git a/plugin/messages.go b/plugin/messages.go index 28a9acc019..ae97f3070f 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -12,8 +12,7 @@ type Message interface { } type MessageCreateTable struct { - Table *schema.Table - MigrateForce bool + Table *schema.Table } func (m MessageCreateTable) GetTable() *schema.Table { diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 5b5d64912a..4cbf9b55df 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -5,6 +5,7 @@ import ( ) type WriteOptions struct { + MigrateForce bool } // this function is currently used mostly for testing so it's not a public api diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 03046d0c87..3d8da11e4c 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -27,13 +27,13 @@ type WriterTestSuite struct { genDatOptions schema.TestSourceOptions } -// MigrateStrategy defines which tests we should include -type MigrateStrategy struct { - AddColumn MigrateMode - AddColumnNotNull MigrateMode - RemoveColumn MigrateMode - RemoveColumnNotNull MigrateMode - ChangeColumn MigrateMode +// NonForceMigrations defines which migrations are supported by the plugin in non-force mode +type NonForceMigrations struct { + AddColumn bool + AddColumnNotNull bool + RemoveColumn bool + RemoveColumnNotNull bool + ChangeColumn bool } type PluginTestSuiteTests struct { @@ -50,9 +50,9 @@ type PluginTestSuiteTests struct { // SkipMigrate skips testing migration SkipMigrate bool - // MigrateStrategy defines which tests should work with force migration + // NonForceMigrations defines which tests should work with force migration // and which should pass with safe migration - MigrateStrategy MigrateStrategy + NonForceMigrations NonForceMigrations } type NewPluginFunc func() *Plugin @@ -123,7 +123,7 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } - suite.testMigrate(ctx, t, MigrateModeSafe) - suite.testMigrate(ctx, t, MigrateModeForce) + suite.testMigrate(ctx, t, false) + suite.testMigrate(ctx, t, true) }) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 13c1fede30..6acff5a17d 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -17,8 +17,10 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ +func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportNonForce bool, writeOptionMigrateForce bool) error { + if err := s.plugin.writeOne(ctx, WriteOptions{ + writeOptionMigrateForce, + }, &MessageCreateTable{ Table: source, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -35,7 +37,9 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou resource1 := schema.GenTestData(source, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + MigrateForce: writeOptionMigrateForce, + }, &MessageInsert{ Record: resource1, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) @@ -52,9 +56,8 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ - Table: target, - MigrateForce: strategy == MigrateModeForce, + if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageCreateTable{ + Table: target, }); err != nil { return fmt.Errorf("failed to create table: %w", err) } @@ -71,7 +74,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err != nil { return fmt.Errorf("failed to sync: %w", err) } - if strategy == MigrateModeSafe || mode == MigrateModeSafe { + if !writeOptionMigrateForce || supportNonForce { totalItems = messages.InsertItems() if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) @@ -89,12 +92,11 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou func (s *WriterTestSuite) testMigrate( ctx context.Context, t *testing.T, - mode MigrateMode, + forceMigrate bool, ) { t.Run("add_column", func(t *testing.T) { - if s.tests.MigrateStrategy.AddColumn == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.AddColumn { + t.Skip("skipping test: add_column") } tableName := "add_column_" + tableUUIDSuffix() source := &schema.Table{ @@ -111,15 +113,14 @@ func (s *WriterTestSuite) testMigrate( {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }, } - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumn, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } }) t.Run("add_column_not_null", func(t *testing.T) { - if s.tests.MigrateStrategy.AddColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.AddColumnNotNull { + t.Skip("skipping test: add_column_not_null") } tableName := "add_column_not_null_" + tableUUIDSuffix() source := &schema.Table{ @@ -135,15 +136,14 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumnNotNull, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } }) t.Run("remove_column", func(t *testing.T) { - if s.tests.MigrateStrategy.RemoveColumn == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumn { + t.Skip("skipping test: remove_column") } tableName := "remove_column_" + tableUUIDSuffix() source := &schema.Table{ @@ -157,15 +157,14 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumn, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } }) t.Run("remove_column_not_null", func(t *testing.T) { - if s.tests.MigrateStrategy.RemoveColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumnNotNull { + t.Skip("skipping test: remove_column_not_null") } tableName := "remove_column_not_null_" + tableUUIDSuffix() source := &schema.Table{ @@ -180,15 +179,14 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumnNotNull, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } }) t.Run("change_column", func(t *testing.T) { - if s.tests.MigrateStrategy.ChangeColumn == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.ChangeColumn { + t.Skip("skipping test: change_column") } tableName := "change_column_" + tableUUIDSuffix() source := &schema.Table{ @@ -203,7 +201,7 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.BinaryTypes.String, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.ChangeColumn, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.ChangeColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } }) diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go.backup similarity index 100% rename from serve/destination_v0_test.go rename to serve/destination_v0_test.go.backup diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go.backup similarity index 100% rename from serve/destination_v1_test.go rename to serve/destination_v1_test.go.backup diff --git a/serve/docs_test.go b/serve/docs_test.go.backup similarity index 82% rename from serve/docs_test.go rename to serve/docs_test.go.backup index 296c9d438e..8b5b5b8abb 100644 --- a/serve/docs_test.go +++ b/serve/docs_test.go.backup @@ -1,6 +1,7 @@ package serve import ( + "context" "testing" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" @@ -13,6 +14,9 @@ func TestPluginDocs(t *testing.T) { "testPlugin", "v1.0.0", memdb.NewMemDBClient) + if err := p.Init(context.Background(), nil); err != nil { + t.Fatal(err) + } srv := Plugin(p, WithArgs("doc", tmpDir), WithTestListener()) if err := srv.newCmdPluginDoc().Execute(); err != nil { t.Fatal(err) diff --git a/serve/plugin_test.go b/serve/plugin_test.go index fb49d3f55e..161193744f 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -103,6 +103,17 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatal(err) } + + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_Options{ + Options: &pb.WriteOptions{ + MigrateForce: true, + }, + }, + }); err != nil { + t.Fatal(err) + } + if err := writeClient.Send(&pb.Write_Request{ Message: &pb.Write_Request_CreateTable{ CreateTable: &pb.MessageCreateTable{ diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index b99cbcf4c0..82b5fcc8d7 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -57,8 +57,7 @@ func TestMixedBatchWriter(t *testing.T) { }, } msgCreateTable1 := plugin.MessageCreateTable{ - Table: table1, - MigrateForce: false, + Table: table1, } // message to create table2 @@ -72,8 +71,7 @@ func TestMixedBatchWriter(t *testing.T) { }, } msgCreateTable2 := plugin.MessageCreateTable{ - Table: table2, - MigrateForce: false, + Table: table2, } // message to insert into table1 From d75995f60ecfa1742df16f72258408de64c3c4e8 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 11:08:28 +0100 Subject: [PATCH 026/125] Rename --- internal/memdb/memdb_test.go | 2 +- plugin/testing_write.go | 8 ++++---- plugin/testing_write_migrate.go | 25 +++++++++++++------------ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index 64ebcca892..fe240fe58c 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -17,7 +17,7 @@ func TestPlugin(t *testing.T) { t, p, plugin.PluginTestSuiteTests{ - NonForceMigrations: plugin.NonForceMigrations{}, + SafeMigrations: plugin.SafeMigrations{}, }, ) } diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 3d8da11e4c..fd25b2bf2d 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -27,8 +27,8 @@ type WriterTestSuite struct { genDatOptions schema.TestSourceOptions } -// NonForceMigrations defines which migrations are supported by the plugin in non-force mode -type NonForceMigrations struct { +// SafeMigrations defines which migrations are supported by the plugin in safe migrate mode +type SafeMigrations struct { AddColumn bool AddColumnNotNull bool RemoveColumn bool @@ -50,9 +50,9 @@ type PluginTestSuiteTests struct { // SkipMigrate skips testing migration SkipMigrate bool - // NonForceMigrations defines which tests should work with force migration + // SafeMigrations defines which tests should work with force migration // and which should pass with safe migration - NonForceMigrations NonForceMigrations + SafeMigrations SafeMigrations } type NewPluginFunc func() *Plugin diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 6acff5a17d..ab2fb0afc3 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -17,7 +17,7 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportNonForce bool, writeOptionMigrateForce bool) error { +func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ writeOptionMigrateForce, }, &MessageCreateTable{ @@ -74,7 +74,8 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err != nil { return fmt.Errorf("failed to sync: %w", err) } - if !writeOptionMigrateForce || supportNonForce { + // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) + if !writeOptionMigrateForce || supportsSafeMigrate { totalItems = messages.InsertItems() if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) @@ -95,7 +96,7 @@ func (s *WriterTestSuite) testMigrate( forceMigrate bool, ) { t.Run("add_column", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.AddColumn { + if !forceMigrate && !s.tests.SafeMigrations.AddColumn { t.Skip("skipping test: add_column") } tableName := "add_column_" + tableUUIDSuffix() @@ -113,13 +114,13 @@ func (s *WriterTestSuite) testMigrate( {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }, } - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumn, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.AddColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } }) t.Run("add_column_not_null", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.AddColumnNotNull { + if !forceMigrate && !s.tests.SafeMigrations.AddColumnNotNull { t.Skip("skipping test: add_column_not_null") } tableName := "add_column_not_null_" + tableUUIDSuffix() @@ -136,13 +137,13 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumnNotNull, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.AddColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } }) t.Run("remove_column", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumn { + if !forceMigrate && !s.tests.SafeMigrations.RemoveColumn { t.Skip("skipping test: remove_column") } tableName := "remove_column_" + tableUUIDSuffix() @@ -157,13 +158,13 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumn, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.RemoveColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } }) t.Run("remove_column_not_null", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumnNotNull { + if !forceMigrate && !s.tests.SafeMigrations.RemoveColumnNotNull { t.Skip("skipping test: remove_column_not_null") } tableName := "remove_column_not_null_" + tableUUIDSuffix() @@ -179,13 +180,13 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumnNotNull, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.RemoveColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } }) t.Run("change_column", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.ChangeColumn { + if !forceMigrate && !s.tests.SafeMigrations.ChangeColumn { t.Skip("skipping test: change_column") } tableName := "change_column_" + tableUUIDSuffix() @@ -201,7 +202,7 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.BinaryTypes.String, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.ChangeColumn, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.ChangeColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } }) From b1588315ccb53060f72557606a3454aa2c8b90fe Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 16:58:12 +0100 Subject: [PATCH 027/125] Add EnablePrimaryKeys --- .../servers/destination/v1/destinations.go | 5 ++ plugin/messages.go | 7 ++- plugin/plugin.go | 8 ++- plugin/plugin_writer.go | 3 +- plugin/testing_upsert.go | 12 +++- plugin/testing_write_migrate.go | 2 +- writers/batch.go | 2 +- writers/mixed_batch.go | 59 +++++++++++-------- writers/mixed_batch_test.go | 22 +++---- 9 files changed, 74 insertions(+), 46 deletions(-) diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 50578b5a6f..ce10b3443e 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -153,7 +153,12 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() + table, err := schema.NewTableFromArrowSchema(rec.Schema()) + if err != nil { + return status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) + } msg := &plugin.MessageInsert{ + Table: table, Record: rec, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } diff --git a/plugin/messages.go b/plugin/messages.go index ae97f3070f..d566117e0f 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -20,13 +20,16 @@ func (m MessageCreateTable) GetTable() *schema.Table { } type MessageInsert struct { - Table *schema.Table Record arrow.Record Upsert bool } func (m MessageInsert) GetTable() *schema.Table { - return m.Table + table, err := schema.NewTableFromArrowSchema(m.Record.Schema()) + if err != nil { + panic(err) + } + return table } // MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case diff --git a/plugin/plugin.go b/plugin/plugin.go index 0502aea231..2da0447d4c 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -9,6 +9,8 @@ import ( "github.com/rs/zerolog" ) +var ErrNotImplemented = fmt.Errorf("not implemented") + type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { @@ -21,17 +23,17 @@ type Client interface { type UnimplementedWriter struct{} func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { - return fmt.Errorf("not implemented") + return ErrNotImplemented } type UnimplementedSync struct{} func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { - return fmt.Errorf("not implemented") + return ErrNotImplemented } func (UnimplementedSync) Tables(ctx context.Context) (schema.Tables, error) { - return nil, fmt.Errorf("not implemented") + return nil, ErrNotImplemented } // Plugin is the base structure required to pass to sdk.serve diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 4cbf9b55df..e4eadb9437 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -5,7 +5,8 @@ import ( ) type WriteOptions struct { - MigrateForce bool + MigrateForce bool + EnablePrimaryKeys bool } // this function is currently used mostly for testing so it's not a public api diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 55a1c0e82d..fae1aa89c4 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -19,7 +19,9 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + EnablePrimaryKeys: true, + }, &MessageCreateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -29,7 +31,9 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + EnablePrimaryKeys: true, + }, &MessageInsert{ Record: record, Upsert: true, }); err != nil { @@ -47,7 +51,9 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + EnablePrimaryKeys: true, + }, &MessageInsert{ Record: record, Upsert: true, }); err != nil { diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index ab2fb0afc3..83f07f3855 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -19,7 +19,7 @@ func tableUUIDSuffix() string { func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ - writeOptionMigrateForce, + MigrateForce: writeOptionMigrateForce, }, &MessageCreateTable{ Table: source, }); err != nil { diff --git a/writers/batch.go b/writers/batch.go index 6c67f99549..79dedff219 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -16,7 +16,7 @@ import ( ) type Writer interface { - Write(ctx context.Context, res <-chan plugin.Message) error + Write(ctx context.Context, writeOptions plugin.WriteOptions, res <-chan plugin.Message) error } const ( diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index d668706862..ae0ad509d9 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -2,6 +2,7 @@ package writers import ( "context" + "reflect" "time" "github.com/apache/arrow/go/v13/arrow/util" @@ -20,9 +21,9 @@ var allMsgTypes = []int{msgTypeCreateTable, msgTypeInsert, msgTypeDeleteStale} // MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - CreateTableBatch(ctx context.Context, messages []plugin.MessageCreateTable) error - InsertBatch(ctx context.Context, messages []plugin.MessageInsert) error - DeleteStaleBatch(ctx context.Context, messages []plugin.MessageDeleteStale) error + CreateTableBatch(ctx context.Context, messages []*plugin.MessageCreateTable, options plugin.WriteOptions) error + InsertBatch(ctx context.Context, messages []*plugin.MessageInsert, options plugin.WriteOptions) error + DeleteStaleBatch(ctx context.Context, messages []*plugin.MessageDeleteStale, options plugin.WriteOptions) error } type MixedBatchWriter struct { @@ -79,30 +80,33 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption func msgID(msg plugin.Message) int { switch msg.(type) { - case plugin.MessageCreateTable: + case plugin.MessageCreateTable, *plugin.MessageCreateTable: return msgTypeCreateTable - case plugin.MessageInsert: + case plugin.MessageInsert, *plugin.MessageInsert: return msgTypeInsert - case plugin.MessageDeleteStale: + case plugin.MessageDeleteStale, *plugin.MessageDeleteStale: return msgTypeDeleteStale } - panic("unknown message type") + panic("unknown message type: " + reflect.TypeOf(msg).Name()) } // Write starts listening for messages on the msgChan channel and writes them to the client in batches. -func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Message) error { - createTable := &batchManager[plugin.MessageCreateTable]{ - batch: make([]plugin.MessageCreateTable, 0, w.batchSize), - writeFunc: w.client.CreateTableBatch, +func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan plugin.Message) error { + createTable := &batchManager[*plugin.MessageCreateTable]{ + batch: make([]*plugin.MessageCreateTable, 0, w.batchSize), + writeFunc: w.client.CreateTableBatch, + writeOptions: options, } insert := &insertBatchManager{ - batch: make([]plugin.MessageInsert, 0, w.batchSize), + batch: make([]*plugin.MessageInsert, 0, w.batchSize), writeFunc: w.client.InsertBatch, maxBatchSizeBytes: int64(w.batchSizeBytes), + writeOptions: options, } - deleteStale := &batchManager[plugin.MessageDeleteStale]{ - batch: make([]plugin.MessageDeleteStale, 0, w.batchSize), - writeFunc: w.client.DeleteStaleBatch, + deleteStale := &batchManager[*plugin.MessageDeleteStale]{ + batch: make([]*plugin.MessageDeleteStale, 0, w.batchSize), + writeFunc: w.client.DeleteStaleBatch, + writeOptions: options, } flush := func(msgType int) error { switch msgType { @@ -127,11 +131,11 @@ func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Mess } prevMsgType = msgType switch v := msg.(type) { - case plugin.MessageCreateTable: + case *plugin.MessageCreateTable: err = createTable.append(ctx, v) - case plugin.MessageInsert: + case *plugin.MessageInsert: err = insert.append(ctx, v) - case plugin.MessageDeleteStale: + case *plugin.MessageDeleteStale: err = deleteStale.append(ctx, v) default: panic("unknown message type") @@ -140,13 +144,17 @@ func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Mess return err } } + if prevMsgType == -1 { + return nil + } return flush(prevMsgType) } // generic batch manager for most message types type batchManager[T plugin.Message] struct { - batch []T - writeFunc func(ctx context.Context, messages []T) error + batch []T + writeFunc func(ctx context.Context, messages []T, options plugin.WriteOptions) error + writeOptions plugin.WriteOptions } func (m *batchManager[T]) append(ctx context.Context, msg T) error { @@ -164,7 +172,7 @@ func (m *batchManager[T]) flush(ctx context.Context) error { return nil } - err := m.writeFunc(ctx, m.batch) + err := m.writeFunc(ctx, m.batch, m.writeOptions) if err != nil { return err } @@ -174,13 +182,14 @@ func (m *batchManager[T]) flush(ctx context.Context) error { // special batch manager for insert messages that also keeps track of the total size of the batch type insertBatchManager struct { - batch []plugin.MessageInsert - writeFunc func(ctx context.Context, messages []plugin.MessageInsert) error + batch []*plugin.MessageInsert + writeFunc func(ctx context.Context, messages []*plugin.MessageInsert, writeOptions plugin.WriteOptions) error curBatchSizeBytes int64 maxBatchSizeBytes int64 + writeOptions plugin.WriteOptions } -func (m *insertBatchManager) append(ctx context.Context, msg plugin.MessageInsert) error { +func (m *insertBatchManager) append(ctx context.Context, msg *plugin.MessageInsert) error { if len(m.batch) == cap(m.batch) || m.curBatchSizeBytes+util.TotalRecordSize(msg.Record) > m.maxBatchSizeBytes { if err := m.flush(ctx); err != nil { return err @@ -196,7 +205,7 @@ func (m *insertBatchManager) flush(ctx context.Context) error { return nil } - err := m.writeFunc(ctx, m.batch) + err := m.writeFunc(ctx, m.batch, m.writeOptions) if err != nil { return err } diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 82b5fcc8d7..610f0bff70 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -16,7 +16,7 @@ type testMixedBatchClient struct { receivedBatches [][]plugin.Message } -func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []plugin.MessageCreateTable) error { +func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []*plugin.MessageCreateTable, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -25,7 +25,7 @@ func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []plug return nil } -func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []plugin.MessageInsert) error { +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*plugin.MessageInsert, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -34,7 +34,7 @@ func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []plugin.Me return nil } -func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []plugin.MessageDeleteStale) error { +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []*plugin.MessageDeleteStale, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -43,6 +43,8 @@ func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []plug return nil } +var _ MixedBatchClient = (*testMixedBatchClient)(nil) + func TestMixedBatchWriter(t *testing.T) { ctx := context.Background() @@ -56,7 +58,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable1 := plugin.MessageCreateTable{ + msgCreateTable1 := &plugin.MessageCreateTable{ Table: table1, } @@ -70,7 +72,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable2 := plugin.MessageCreateTable{ + msgCreateTable2 := &plugin.MessageCreateTable{ Table: table2, } @@ -78,7 +80,7 @@ func TestMixedBatchWriter(t *testing.T) { bldr1 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr1.Field(0).(*array.Int64Builder).Append(1) rec1 := bldr1.NewRecord() - msgInsertTable1 := plugin.MessageInsert{ + msgInsertTable1 := &plugin.MessageInsert{ Record: rec1, } @@ -86,18 +88,18 @@ func TestMixedBatchWriter(t *testing.T) { bldr2 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr2.Field(0).(*array.Int64Builder).Append(1) rec2 := bldr2.NewRecord() - msgInsertTable2 := plugin.MessageInsert{ + msgInsertTable2 := &plugin.MessageInsert{ Record: rec2, Upsert: false, } // message to delete stale from table1 - msgDeleteStale1 := plugin.MessageDeleteStale{ + msgDeleteStale1 := &plugin.MessageDeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), } - msgDeleteStale2 := plugin.MessageDeleteStale{ + msgDeleteStale2 := &plugin.MessageDeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), @@ -177,7 +179,7 @@ func TestMixedBatchWriter(t *testing.T) { ch <- msg } close(ch) - if err := wr.Write(ctx, ch); err != nil { + if err := wr.Write(ctx, plugin.WriteOptions{}, ch); err != nil { t.Fatal(err) } if len(client.receivedBatches) != len(tc.wantBatches) { From b9b23162ef70b97b74551f6842c7da0af8835088 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 16:59:42 +0100 Subject: [PATCH 028/125] Undo table change --- internal/servers/destination/v1/destinations.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index ce10b3443e..50578b5a6f 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -153,12 +153,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() - table, err := schema.NewTableFromArrowSchema(rec.Schema()) - if err != nil { - return status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) - } msg := &plugin.MessageInsert{ - Table: table, Record: rec, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } From bcbf81c35742e9861fed3b9ab547c6aea512b021 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 00:20:48 +0300 Subject: [PATCH 029/125] use read method instead of sync for write testing --- {internal/glob => glob}/LICENSE | 0 {internal/glob => glob}/README.md | 0 {internal/glob => glob}/glob.go | 14 ++++++++++++++ {internal/glob => glob}/glob_test.go | 0 internal/memdb/memdb.go | 11 +++++++++++ plugin/plugin.go | 6 ++++++ plugin/plugin_reader.go | 18 ++++++++++++++++- plugin/plugin_test.go | 6 ++++++ plugin/testing_upsert.go | 20 +++++++++++-------- plugin/testing_write_delete.go | 18 +++++++++-------- plugin/testing_write_insert.go | 29 +++++++++++++++++++--------- plugin/testing_write_migrate.go | 26 +++++++++++++++++-------- schema/table.go | 2 +- writers/batch.go | 13 +++++++++++++ 14 files changed, 128 insertions(+), 35 deletions(-) rename {internal/glob => glob}/LICENSE (100%) rename {internal/glob => glob}/README.md (100%) rename {internal/glob => glob}/glob.go (85%) rename {internal/glob => glob}/glob_test.go (100%) diff --git a/internal/glob/LICENSE b/glob/LICENSE similarity index 100% rename from internal/glob/LICENSE rename to glob/LICENSE diff --git a/internal/glob/README.md b/glob/README.md similarity index 100% rename from internal/glob/README.md rename to glob/README.md diff --git a/internal/glob/glob.go b/glob/glob.go similarity index 85% rename from internal/glob/glob.go rename to glob/glob.go index e67db3be18..b4fd6535db 100644 --- a/internal/glob/glob.go +++ b/glob/glob.go @@ -5,6 +5,20 @@ import "strings" // The character which is treated like a glob const GLOB = "*" +func IncludeTable(name string, tables []string, skipTables []string) bool { + for _, t := range skipTables { + if Glob(t, name) { + return false + } + } + for _, t := range tables { + if Glob(t, name) { + return true + } + } + return false +} + // Glob will test a string pattern, potentially containing globs, against a // subject string. The result is a simple true/false, determining whether or // not the glob pattern matched the subject text. diff --git a/internal/glob/glob_test.go b/glob/glob_test.go similarity index 100% rename from internal/glob/glob_test.go rename to glob/glob_test.go diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index ca202a82be..1b3df0e47e 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -84,6 +84,17 @@ func (c *client) ID() string { return "testDestinationMemDB" } +func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { + c.memoryDBLock.RLock() + defer c.memoryDBLock.RUnlock() + + tableName := table.Name + for _, row := range c.memoryDB[tableName] { + res <- row + } + return nil +} + func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- plugin.Message) error { c.memoryDBLock.RLock() diff --git a/plugin/plugin.go b/plugin/plugin.go index 2da0447d4c..818b76f158 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -5,6 +5,7 @@ import ( "fmt" "sync" + "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -16,6 +17,7 @@ type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error + Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error Write(ctx context.Context, options WriteOptions, res <-chan Message) error Close(ctx context.Context) error } @@ -26,6 +28,10 @@ func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res return ErrNotImplemented } +func (UnimplementedWriter) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + type UnimplementedSync struct{} func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 0544af3738..f6bdd4cbcc 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -4,7 +4,8 @@ import ( "context" "fmt" - "github.com/cloudquery/plugin-sdk/v4/internal/glob" + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/glob" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" @@ -59,6 +60,21 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } +func (p *Plugin) readAll(ctx context.Context, table *schema.Table) ([]arrow.Record, error) { + var err error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + err = p.client.Read(ctx, table, ch) + }() + // nolint:prealloc + var records []arrow.Record + for record := range ch { + records = append(records, record) + } + return records, err +} + func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, error) { var err error ch := make(chan Message) diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index e33670b6d3..c50456139c 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -4,6 +4,7 @@ import ( "context" "testing" + "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -22,6 +23,11 @@ func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { func (c *testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { return schema.Tables{}, nil } + +func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { + return nil +} + func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { for _, msg := range c.messages { res <- msg diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index fae1aa89c4..65199178c2 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -40,13 +40,15 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + // totalItems := messages.InsertItems() + totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } @@ -60,14 +62,16 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems = messages.InsertItems() + // totalItems = messages.InsertItems() + totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index ad569e5baf..bfaa5fc70e 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -38,13 +38,14 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 items, got %d", totalItems) @@ -62,13 +63,14 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to delete stale records: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems = messages.InsertItems() + totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index 36004ac173..15f09d161e 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -11,6 +11,14 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +func TotalRows(records []arrow.Record) int64 { + totalRows := int64(0) + for _, record := range records { + totalRows += record.NumRows() + } + return totalRows +} + func (s *WriterTestSuite) testInsert(ctx context.Context) error { tableName := fmt.Sprintf("cq_test_insert_%d", time.Now().Unix()) table := &schema.Table{ @@ -35,14 +43,16 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + readRecords, err := s.plugin.readAll(ctx, table) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + + + totalItems := TotalRows(readRecords) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } @@ -53,14 +63,15 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + readRecords, err = s.plugin.readAll(ctx, table) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems = messages.InsertItems() + totalItems = TotalRows(readRecords) if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 83f07f3855..eb5b9c80d2 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -45,13 +45,15 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{source.Name}, - }) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{source.Name}, + // }) + records, err := s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + // totalItems := messages.InsertItems() + totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } @@ -68,20 +70,28 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{source.Name}, - }) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{source.Name}, + // }) + records, err = s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) } +<<<<<<< HEAD // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) if !writeOptionMigrateForce || supportsSafeMigrate { totalItems = messages.InsertItems() +======= + if !writeOptionMigrateForce || supportNonForce { + // totalItems = messages.InsertItems() + totalItems = TotalRows(records) +>>>>>>> a0daa22 (use read method instead of sync for write testing) if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) } } else { - totalItems = messages.InsertItems() + // totalItems = messages.InsertItems() + totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } diff --git a/schema/table.go b/schema/table.go index 9e84f637b1..76a7384650 100644 --- a/schema/table.go +++ b/schema/table.go @@ -8,7 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" - "github.com/cloudquery/plugin-sdk/v4/internal/glob" + "github.com/cloudquery/plugin-sdk/v4/glob" "golang.org/x/exp/slices" ) diff --git a/writers/batch.go b/writers/batch.go index 79dedff219..9ece93efc7 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -106,6 +106,19 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err return c, nil } +func (w *BatchWriter) Flush(ctx context.Context) error { + w.workersLock.RLock() + for _, worker := range w.workers { + done := make(chan bool) + worker.flush <- done + <-done + } + w.workersLock.RUnlock() + w.flushCreateTables(ctx) + w.flushDeleteStaleTables(ctx) + return nil +} + func (w *BatchWriter) Close(ctx context.Context) error { w.workersLock.Lock() defer w.workersLock.Unlock() From 31464e9e4582d1c4d2c1a6579f86df17d874c799 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 00:22:55 +0300 Subject: [PATCH 030/125] rebase complete --- plugin/testing_write_migrate.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index eb5b9c80d2..168f7fa3e2 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -77,15 +77,9 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err != nil { return fmt.Errorf("failed to sync: %w", err) } -<<<<<<< HEAD // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) if !writeOptionMigrateForce || supportsSafeMigrate { - totalItems = messages.InsertItems() -======= - if !writeOptionMigrateForce || supportNonForce { - // totalItems = messages.InsertItems() totalItems = TotalRows(records) ->>>>>>> a0daa22 (use read method instead of sync for write testing) if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) } From 934cfcdb5a6eb6b5d94df90ec8c2350d9b54cccf Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 00:33:46 +0300 Subject: [PATCH 031/125] nits --- plugin/testing_upsert.go | 8 -------- plugin/testing_write_delete.go | 7 ------- plugin/testing_write_insert.go | 6 ------ plugin/testing_write_migrate.go | 8 -------- 4 files changed, 29 deletions(-) diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 65199178c2..3279064382 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -40,14 +40,10 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - // totalItems := messages.InsertItems() totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) @@ -62,15 +58,11 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - // totalItems = messages.InsertItems() totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index bfaa5fc70e..86606b7286 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -8,7 +8,6 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/schema" - // "github.com/cloudquery/plugin-sdk/v4/types" ) func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { @@ -38,9 +37,6 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) @@ -63,9 +59,6 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to delete stale records: %w", err) } - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index 15f09d161e..e1b2e08be6 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -44,9 +44,6 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } readRecords, err := s.plugin.readAll(ctx, table) - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } @@ -64,9 +61,6 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { } readRecords, err = s.plugin.readAll(ctx, table) - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 168f7fa3e2..617f1946e7 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -45,14 +45,10 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{source.Name}, - // }) records, err := s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - // totalItems := messages.InsertItems() totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) @@ -70,9 +66,6 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{source.Name}, - // }) records, err = s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) @@ -84,7 +77,6 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 2 item, got %d", totalItems) } } else { - // totalItems = messages.InsertItems() totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) From 20342b488d8b8f8b8db1a02c3e004012ab4edc34 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 11:07:03 +0300 Subject: [PATCH 032/125] fix some more tests --- .gitignore | 3 ++- serve/{docs_test.go.backup => docs_test.go} | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) rename serve/{docs_test.go.backup => docs_test.go} (75%) diff --git a/.gitignore b/.gitignore index d15ff8fe72..605ca47ead 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ config.hcl vendor cover.out .delta.* -bench.json \ No newline at end of file +bench.json +serve/^TestPluginDocs$/ \ No newline at end of file diff --git a/serve/docs_test.go.backup b/serve/docs_test.go similarity index 75% rename from serve/docs_test.go.backup rename to serve/docs_test.go index 8b5b5b8abb..1548e0b1c1 100644 --- a/serve/docs_test.go.backup +++ b/serve/docs_test.go @@ -17,8 +17,10 @@ func TestPluginDocs(t *testing.T) { if err := p.Init(context.Background(), nil); err != nil { t.Fatal(err) } - srv := Plugin(p, WithArgs("doc", tmpDir), WithTestListener()) - if err := srv.newCmdPluginDoc().Execute(); err != nil { + srv := Plugin(p) + cmd := srv.newCmdPluginRoot() + cmd.SetArgs([]string{"doc", tmpDir}) + if err := cmd.Execute(); err != nil { t.Fatal(err) } } From 7a59ca783cd07923d86fff2ae8e41d187ef71a85 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 10:54:20 +0100 Subject: [PATCH 033/125] Rename interfaces to Source/Destination, couple of testing bugfixes --- plugin/plugin.go | 19 +++++----- ...plugin_writer.go => plugin_destination.go} | 16 ++++++--- plugin/{plugin_reader.go => plugin_source.go} | 18 +++++----- plugin/testing_upsert.go | 14 +++----- plugin/testing_write.go | 5 --- plugin/testing_write_migrate.go | 35 ++++++++++--------- 6 files changed, 52 insertions(+), 55 deletions(-) rename plugin/{plugin_writer.go => plugin_destination.go} (58%) rename plugin/{plugin_reader.go => plugin_source.go} (83%) diff --git a/plugin/plugin.go b/plugin/plugin.go index 818b76f158..ab79cc633a 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -15,30 +15,27 @@ var ErrNotImplemented = fmt.Errorf("not implemented") type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { - Tables(ctx context.Context) (schema.Tables, error) - Sync(ctx context.Context, options SyncOptions, res chan<- Message) error - Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error - Write(ctx context.Context, options WriteOptions, res <-chan Message) error - Close(ctx context.Context) error + SourceClient + DestinationClient } -type UnimplementedWriter struct{} +type UnimplementedDestination struct{} -func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (UnimplementedDestination) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { return ErrNotImplemented } -func (UnimplementedWriter) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { +func (UnimplementedDestination) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } -type UnimplementedSync struct{} +type UnimplementedSource struct{} -func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (UnimplementedSource) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { return ErrNotImplemented } -func (UnimplementedSync) Tables(ctx context.Context) (schema.Tables, error) { +func (UnimplementedSource) Tables(ctx context.Context) (schema.Tables, error) { return nil, ErrNotImplemented } diff --git a/plugin/plugin_writer.go b/plugin/plugin_destination.go similarity index 58% rename from plugin/plugin_writer.go rename to plugin/plugin_destination.go index e4eadb9437..123bdf717f 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_destination.go @@ -2,20 +2,28 @@ package plugin import ( "context" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type WriteOptions struct { - MigrateForce bool - EnablePrimaryKeys bool + MigrateForce bool +} + +type DestinationClient interface { + Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error + Write(ctx context.Context, options WriteOptions, res <-chan Message) error + Close(ctx context.Context) error } -// this function is currently used mostly for testing so it's not a public api +// writeOne is currently used mostly for testing, so it's not a public api func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { resources := []Message{resource} return p.WriteAll(ctx, options, resources) } -// this function is currently used mostly for testing so it's not a public api +// WriteAll is currently used mostly for testing, so it's not a public api func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []Message) error { ch := make(chan Message, len(resources)) for _, resource := range resources { diff --git a/plugin/plugin_reader.go b/plugin/plugin_source.go similarity index 83% rename from plugin/plugin_reader.go rename to plugin/plugin_source.go index f6bdd4cbcc..52d41243c4 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_source.go @@ -19,7 +19,7 @@ type SyncOptions struct { StateBackend state.Client } -type ReadOnlyClient interface { +type SourceClient interface { Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error Close(ctx context.Context) error @@ -39,21 +39,21 @@ func IsTable(name string, includeTablesPattern []string, skipTablesPattern []str return false } -type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (ReadOnlyClient, error) +type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (SourceClient, error) -// NewReadOnlyPlugin returns a new CloudQuery Plugin with the given name, version and implementation. -// this plugin will only support read operations. For ReadWrite plugin use NewPlugin. -func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { +// NewSourcePlugin returns a new CloudQuery Plugin with the given name, version and implementation. +// Source plugins only support read operations. For Read & Write plugin use NewPlugin. +func NewSourcePlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { - readOnlyClient, err := newClient(ctx, logger, any) + sourceClient, err := newClient(ctx, logger, any) if err != nil { return nil, err } wrapperClient := struct { - ReadOnlyClient - UnimplementedWriter + SourceClient + UnimplementedDestination }{ - ReadOnlyClient: readOnlyClient, + SourceClient: sourceClient, } return wrapperClient, nil } diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 3279064382..3bacf4474a 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -19,9 +19,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{ - EnablePrimaryKeys: true, - }, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -31,9 +29,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{ - EnablePrimaryKeys: true, - }, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: record, Upsert: true, }); err != nil { @@ -42,16 +38,14 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { records, err := s.plugin.readAll(ctx, table) if err != nil { - return fmt.Errorf("failed to sync: %w", err) + return fmt.Errorf("failed to readAll: %w", err) } totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{ - EnablePrimaryKeys: true, - }, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: record, Upsert: true, }); err != nil { diff --git a/plugin/testing_write.go b/plugin/testing_write.go index fd25b2bf2d..2dd9c4b074 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -76,7 +76,6 @@ func WithTestDataOptions(opts schema.TestSourceOptions) func(o *WriterTestSuite) } func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *WriterTestSuite)) { - t.Helper() suite := &WriterTestSuite{ tests: tests, plugin: p, @@ -89,7 +88,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, ctx := context.Background() t.Run("TestUpsert", func(t *testing.T) { - t.Helper() if suite.tests.SkipUpsert { t.Skip("skipping " + t.Name()) } @@ -99,7 +97,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, }) t.Run("TestInsert", func(t *testing.T) { - t.Helper() if suite.tests.SkipInsert { t.Skip("skipping " + t.Name()) } @@ -109,7 +106,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, }) t.Run("TestDeleteStale", func(t *testing.T) { - t.Helper() if suite.tests.SkipDeleteStale { t.Skip("skipping " + t.Name()) } @@ -119,7 +115,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, }) t.Run("TestMigrate", func(t *testing.T) { - t.Helper() if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 617f1946e7..56aebe956a 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -37,9 +37,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou resource1 := schema.GenTestData(source, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{ - MigrateForce: writeOptionMigrateForce, - }, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: resource1, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) @@ -60,15 +58,16 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to create table: %w", err) } + resource2 := schema.GenTestData(target, opts)[0] if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: resource1, + Record: resource2, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - records, err = s.plugin.readAll(ctx, source) + records, err = s.plugin.readAll(ctx, target) if err != nil { - return fmt.Errorf("failed to sync: %w", err) + return fmt.Errorf("failed to readAll: %w", err) } // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) if !writeOptionMigrateForce || supportsSafeMigrate { @@ -91,11 +90,15 @@ func (s *WriterTestSuite) testMigrate( t *testing.T, forceMigrate bool, ) { - t.Run("add_column", func(t *testing.T) { + suffix := "_safe" + if forceMigrate { + suffix = "_force" + } + t.Run("add_column"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.AddColumn { t.Skip("skipping test: add_column") } - tableName := "add_column_" + tableUUIDSuffix() + tableName := "add_column" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -115,11 +118,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("add_column_not_null", func(t *testing.T) { + t.Run("add_column_not_null"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.AddColumnNotNull { t.Skip("skipping test: add_column_not_null") } - tableName := "add_column_not_null_" + tableUUIDSuffix() + tableName := "add_column_not_null" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -138,11 +141,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("remove_column", func(t *testing.T) { + t.Run("remove_column"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.RemoveColumn { t.Skip("skipping test: remove_column") } - tableName := "remove_column_" + tableUUIDSuffix() + tableName := "remove_column" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -159,11 +162,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("remove_column_not_null", func(t *testing.T) { + t.Run("remove_column_not_null"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.RemoveColumnNotNull { t.Skip("skipping test: remove_column_not_null") } - tableName := "remove_column_not_null_" + tableUUIDSuffix() + tableName := "remove_column_not_null" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -181,11 +184,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("change_column", func(t *testing.T) { + t.Run("change_column"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.ChangeColumn { t.Skip("skipping test: change_column") } - tableName := "change_column_" + tableUUIDSuffix() + tableName := "change_column" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ From b0dff8565f3217d9b73fbcb8cf9715b77440cbef Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 11:28:24 +0100 Subject: [PATCH 034/125] Use only 8 chars of uuid --- plugin/testing_write_migrate.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 56aebe956a..1cb67f225c 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -14,7 +14,7 @@ import ( ) func tableUUIDSuffix() string { - return strings.ReplaceAll(uuid.NewString(), "-", "_") + return strings.ReplaceAll(uuid.NewString(), "-", "_")[:8] // use only first 8 chars } func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { @@ -40,7 +40,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: resource1, }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) + return fmt.Errorf("failed to insert first record: %w", err) } records, err := s.plugin.readAll(ctx, source) @@ -62,7 +62,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: resource2, }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) + return fmt.Errorf("failed to insert second record: %w", err) } records, err = s.plugin.readAll(ctx, target) From 2ce96f09598a81add27b9e6d0742234d90f920ac Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 13:33:24 +0100 Subject: [PATCH 035/125] Rename to MigrateTable --- internal/memdb/memdb.go | 2 +- .../servers/destination/v0/destinations.go | 4 +- .../servers/destination/v1/destinations.go | 4 +- internal/servers/plugin/v3/plugin.go | 12 +++--- internal/servers/plugin/v3/state.go | 4 +- plugin/messages.go | 8 ++-- plugin/plugin_test.go | 2 +- plugin/testing_upsert.go | 2 +- plugin/testing_write_delete.go | 2 +- plugin/testing_write_insert.go | 3 +- plugin/testing_write_migrate.go | 4 +- serve/plugin_test.go | 4 +- writers/batch.go | 42 +++++++++---------- writers/batch_test.go | 22 +++++----- writers/mixed_batch.go | 24 +++++------ writers/mixed_batch_test.go | 26 ++++++------ 16 files changed, 82 insertions(+), 83 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 1b3df0e47e..00c76b4ae4 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -155,7 +155,7 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <- c.memoryDBLock.Lock() switch msg := msg.(type) { - case *plugin.MessageCreateTable: + case *plugin.MessageMigrateTable: c.migrate(ctx, msg.Table) case *plugin.MessageDeleteStale: c.deleteStale(ctx, msg) diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index d8c5f85c9d..905897889f 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -70,7 +70,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageCreateTable{ + writeCh <- &plugin.MessageMigrateTable{ Table: table, } } @@ -125,7 +125,7 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { }) for _, table := range tables { - msgs <- &plugin.MessageCreateTable{ + msgs <- &plugin.MessageMigrateTable{ Table: table, } } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 50578b5a6f..f213747ea5 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -68,7 +68,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageCreateTable{ + writeCh <- &plugin.MessageMigrateTable{ Table: table, } } @@ -121,7 +121,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { }) for _, table := range tables { - msgs <- &plugin.MessageCreateTable{ + msgs <- &plugin.MessageMigrateTable{ Table: table, } } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 314475e8fb..defbc83be1 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -108,10 +108,10 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { pbMsg := &pb.Sync_Response{} for msg := range msgs { switch m := msg.(type) { - case *plugin.MessageCreateTable: + case *plugin.MessageMigrateTable: m.Table.ToArrowSchema() - pbMsg.Message = &pb.Sync_Response_CreateTable{ - CreateTable: &pb.MessageCreateTable{ + pbMsg.Message = &pb.Sync_Response_MigrateTable{ + MigrateTable: &pb.MessageMigrateTable{ Table: nil, }, } @@ -195,13 +195,13 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { var pluginMessage plugin.Message var pbMsgConvertErr error switch pbMsg := r.Message.(type) { - case *pb.Write_Request_CreateTable: - table, err := schema.NewTableFromBytes(pbMsg.CreateTable.Table) + case *pb.Write_Request_MigrateTable: + table, err := schema.NewTableFromBytes(pbMsg.MigrateTable.Table) if err != nil { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) break } - pluginMessage = &plugin.MessageCreateTable{ + pluginMessage = &plugin.MessageMigrateTable{ Table: table, } case *pb.Write_Request_Insert: diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index 81fd753a5c..f7a9015433 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -77,8 +77,8 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec *pbPlugin.S } if err := writeClient.Send(&pbPlugin.Write_Request{ - Message: &pbPlugin.Write_Request_CreateTable{ - CreateTable: &pbPlugin.MessageCreateTable{ + Message: &pbPlugin.Write_Request_MigrateTable{ + MigrateTable: &pbPlugin.MessageMigrateTable{ Table: tableBytes, }, }, diff --git a/plugin/messages.go b/plugin/messages.go index d566117e0f..dd5e91d0c7 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -11,11 +11,11 @@ type Message interface { GetTable() *schema.Table } -type MessageCreateTable struct { +type MessageMigrateTable struct { Table *schema.Table } -func (m MessageCreateTable) GetTable() *schema.Table { +func (m MessageMigrateTable) GetTable() *schema.Table { return m.Table } @@ -48,7 +48,7 @@ func (m MessageDeleteStale) GetTable() *schema.Table { type Messages []Message -type CreateTables []*MessageCreateTable +type MigrateTables []*MessageMigrateTable type Inserts []*MessageInsert @@ -63,7 +63,7 @@ func (messages Messages) InsertItems() int64 { return items } -func (m CreateTables) Exists(tableName string) bool { +func (m MigrateTables) Exists(tableName string) bool { for _, table := range m { if table.Table.Name == tableName { return true diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index c50456139c..b797e81691 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -61,7 +61,7 @@ func TestPluginSuccess(t *testing.T) { t.Fatal(err) } if err := p.WriteAll(ctx, WriteOptions{}, []Message{ - MessageCreateTable{}, + MessageMigrateTable{}, }); err != nil { t.Fatal(err) } diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 3bacf4474a..268d65a45f 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -19,7 +19,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index 86606b7286..69e6ec2e92 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -20,7 +20,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { schema.CqSyncTimeColumn, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index e1b2e08be6..f0cc3e8d83 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -27,7 +27,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -47,7 +47,6 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := TotalRows(readRecords) if totalItems != 1 { diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 1cb67f225c..697d4cfda7 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -20,7 +20,7 @@ func tableUUIDSuffix() string { func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ MigrateForce: writeOptionMigrateForce, - }, &MessageCreateTable{ + }, &MessageMigrateTable{ Table: source, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -52,7 +52,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageMigrateTable{ Table: target, }); err != nil { return fmt.Errorf("failed to create table: %w", err) diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 161193744f..e61555a2fc 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -115,8 +115,8 @@ func TestPluginServe(t *testing.T) { } if err := writeClient.Send(&pb.Write_Request{ - Message: &pb.Write_Request_CreateTable{ - CreateTable: &pb.MessageCreateTable{ + Message: &pb.Write_Request_MigrateTable{ + MigrateTable: &pb.MessageMigrateTable{ Table: tableBytes, }, }, diff --git a/writers/batch.go b/writers/batch.go index 9ece93efc7..34f15c84f0 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -27,19 +27,19 @@ const ( ) type BatchWriterClient interface { - CreateTables(context.Context, []*plugin.MessageCreateTable) error + MigrateTables(context.Context, []*plugin.MessageMigrateTable) error WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*plugin.MessageInsert) error DeleteStale(context.Context, []*plugin.MessageDeleteStale) error } type BatchWriter struct { - client BatchWriterClient - semaphore *semaphore.Weighted - workers map[string]*worker - workersLock *sync.RWMutex - workersWaitGroup *sync.WaitGroup - createTableMessages []*plugin.MessageCreateTable - deleteStaleMessages []*plugin.MessageDeleteStale + client BatchWriterClient + semaphore *semaphore.Weighted + workers map[string]*worker + workersLock *sync.RWMutex + workersWaitGroup *sync.WaitGroup + migrateTableMessages []*plugin.MessageMigrateTable + deleteStaleMessages []*plugin.MessageDeleteStale logger zerolog.Logger batchTimeout time.Duration @@ -101,7 +101,7 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err for _, opt := range opts { opt(c) } - c.createTableMessages = make([]*plugin.MessageCreateTable, 0, c.batchSize) + c.migrateTableMessages = make([]*plugin.MessageMigrateTable, 0, c.batchSize) c.deleteStaleMessages = make([]*plugin.MessageDeleteStale, 0, c.batchSize) return c, nil } @@ -114,7 +114,7 @@ func (w *BatchWriter) Flush(ctx context.Context) error { <-done } w.workersLock.RUnlock() - w.flushCreateTables(ctx) + w.flushMigrateTables(ctx) w.flushDeleteStaleTables(ctx) return nil } @@ -217,11 +217,11 @@ func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow. return res } -func (w *BatchWriter) flushCreateTables(ctx context.Context) error { - if err := w.client.CreateTables(ctx, w.createTableMessages); err != nil { +func (w *BatchWriter) flushMigrateTables(ctx context.Context) error { + if err := w.client.MigrateTables(ctx, w.migrateTableMessages); err != nil { return err } - w.createTableMessages = w.createTableMessages[:0] + w.migrateTableMessages = w.migrateTableMessages[:0] return nil } @@ -260,8 +260,8 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err for msg := range msgs { switch m := msg.(type) { case *plugin.MessageDeleteStale: - if len(w.createTableMessages) > 0 { - if err := w.flushCreateTables(ctx); err != nil { + if len(w.migrateTableMessages) > 0 { + if err := w.flushMigrateTables(ctx); err != nil { return err } } @@ -273,8 +273,8 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err } } case *plugin.MessageInsert: - if len(w.createTableMessages) > 0 { - if err := w.flushCreateTables(ctx); err != nil { + if len(w.migrateTableMessages) > 0 { + if err := w.flushMigrateTables(ctx); err != nil { return err } } @@ -286,16 +286,16 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err if err := w.startWorker(ctx, m); err != nil { return err } - case *plugin.MessageCreateTable: + case *plugin.MessageMigrateTable: w.flushInsert(ctx, m.Table.Name) if len(w.deleteStaleMessages) > 0 { if err := w.flushDeleteStaleTables(ctx); err != nil { return err } } - w.createTableMessages = append(w.createTableMessages, m) - if len(w.createTableMessages) > w.batchSize { - if err := w.flushCreateTables(ctx); err != nil { + w.migrateTableMessages = append(w.migrateTableMessages, m) + if len(w.migrateTableMessages) > w.batchSize { + if err := w.flushMigrateTables(ctx); err != nil { return err } } diff --git a/writers/batch_test.go b/writers/batch_test.go index cb51311aeb..deef3bc627 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -13,13 +13,13 @@ import ( ) type testBatchClient struct { - createTables []*plugin.MessageCreateTable - inserts []*plugin.MessageInsert - deleteStales []*plugin.MessageDeleteStale + migrateTables []*plugin.MessageMigrateTable + inserts []*plugin.MessageInsert + deleteStales []*plugin.MessageDeleteStale } -func (c *testBatchClient) CreateTables(_ context.Context, msgs []*plugin.MessageCreateTable) error { - c.createTables = append(c.createTables, msgs...) +func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*plugin.MessageMigrateTable) error { + c.migrateTables = append(c.migrateTables, msgs...) return nil } @@ -67,24 +67,24 @@ func TestBatchFlushDifferentMessages(t *testing.T) { bldr := array.NewRecordBuilder(memory.DefaultAllocator, batchTestTables[0].ToArrowSchema()) bldr.Field(0).(*array.Int64Builder).Append(1) record := bldr.NewRecord() - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } - if len(testClient.createTables) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.createTables)) + if len(testClient.migrateTables) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.migrateTables)) } if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{Record: record}}); err != nil { t.Fatal(err) } - if len(testClient.createTables) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.createTables)) + if len(testClient.migrateTables) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.migrateTables)) } if len(testClient.inserts) != 0 { t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index ae0ad509d9..998a9d7c13 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -12,16 +12,16 @@ import ( ) const ( - msgTypeCreateTable = iota + msgTypeMigrateTable = iota msgTypeInsert msgTypeDeleteStale ) -var allMsgTypes = []int{msgTypeCreateTable, msgTypeInsert, msgTypeDeleteStale} +var allMsgTypes = []int{msgTypeMigrateTable, msgTypeInsert, msgTypeDeleteStale} // MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - CreateTableBatch(ctx context.Context, messages []*plugin.MessageCreateTable, options plugin.WriteOptions) error + MigrateTableBatch(ctx context.Context, messages []*plugin.MessageMigrateTable, options plugin.WriteOptions) error InsertBatch(ctx context.Context, messages []*plugin.MessageInsert, options plugin.WriteOptions) error DeleteStaleBatch(ctx context.Context, messages []*plugin.MessageDeleteStale, options plugin.WriteOptions) error } @@ -80,8 +80,8 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption func msgID(msg plugin.Message) int { switch msg.(type) { - case plugin.MessageCreateTable, *plugin.MessageCreateTable: - return msgTypeCreateTable + case plugin.MessageMigrateTable, *plugin.MessageMigrateTable: + return msgTypeMigrateTable case plugin.MessageInsert, *plugin.MessageInsert: return msgTypeInsert case plugin.MessageDeleteStale, *plugin.MessageDeleteStale: @@ -92,9 +92,9 @@ func msgID(msg plugin.Message) int { // Write starts listening for messages on the msgChan channel and writes them to the client in batches. func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan plugin.Message) error { - createTable := &batchManager[*plugin.MessageCreateTable]{ - batch: make([]*plugin.MessageCreateTable, 0, w.batchSize), - writeFunc: w.client.CreateTableBatch, + migrateTable := &batchManager[*plugin.MessageMigrateTable]{ + batch: make([]*plugin.MessageMigrateTable, 0, w.batchSize), + writeFunc: w.client.MigrateTableBatch, writeOptions: options, } insert := &insertBatchManager{ @@ -110,8 +110,8 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } flush := func(msgType int) error { switch msgType { - case msgTypeCreateTable: - return createTable.flush(ctx) + case msgTypeMigrateTable: + return migrateTable.flush(ctx) case msgTypeInsert: return insert.flush(ctx) case msgTypeDeleteStale: @@ -131,8 +131,8 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } prevMsgType = msgType switch v := msg.(type) { - case *plugin.MessageCreateTable: - err = createTable.append(ctx, v) + case *plugin.MessageMigrateTable: + err = migrateTable.append(ctx, v) case *plugin.MessageInsert: err = insert.append(ctx, v) case *plugin.MessageDeleteStale: diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 610f0bff70..8b952356e2 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -16,7 +16,7 @@ type testMixedBatchClient struct { receivedBatches [][]plugin.Message } -func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []*plugin.MessageCreateTable, options plugin.WriteOptions) error { +func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*plugin.MessageMigrateTable, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -58,7 +58,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable1 := &plugin.MessageCreateTable{ + msgMigrateTable1 := &plugin.MessageMigrateTable{ Table: table1, } @@ -72,7 +72,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable2 := &plugin.MessageCreateTable{ + msgMigrateTable2 := &plugin.MessageMigrateTable{ Table: table2, } @@ -113,15 +113,15 @@ func TestMixedBatchWriter(t *testing.T) { { name: "create table, insert, delete stale", messages: []plugin.Message{ - msgCreateTable1, - msgCreateTable2, + msgMigrateTable1, + msgMigrateTable2, msgInsertTable1, msgInsertTable2, msgDeleteStale1, msgDeleteStale2, }, wantBatches: [][]plugin.Message{ - {msgCreateTable1, msgCreateTable2}, + {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1, msgInsertTable2}, {msgDeleteStale1, msgDeleteStale2}, }, @@ -129,18 +129,18 @@ func TestMixedBatchWriter(t *testing.T) { { name: "interleaved messages", messages: []plugin.Message{ - msgCreateTable1, + msgMigrateTable1, msgInsertTable1, msgDeleteStale1, - msgCreateTable2, + msgMigrateTable2, msgInsertTable2, msgDeleteStale2, }, wantBatches: [][]plugin.Message{ - {msgCreateTable1}, + {msgMigrateTable1}, {msgInsertTable1}, {msgDeleteStale1}, - {msgCreateTable2}, + {msgMigrateTable2}, {msgInsertTable2}, {msgDeleteStale2}, }, @@ -148,15 +148,15 @@ func TestMixedBatchWriter(t *testing.T) { { name: "interleaved messages", messages: []plugin.Message{ - msgCreateTable1, - msgCreateTable2, + msgMigrateTable1, + msgMigrateTable2, msgInsertTable1, msgDeleteStale2, msgInsertTable2, msgDeleteStale1, }, wantBatches: [][]plugin.Message{ - {msgCreateTable1, msgCreateTable2}, + {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1}, {msgDeleteStale2}, {msgInsertTable2}, From 072a48ee428dfd466b484b96076d62ac99e0c863 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 16:18:36 +0100 Subject: [PATCH 036/125] Move messages package and many other changes --- internal/memdb/memdb.go | 21 +++++--- .../servers/destination/v0/destinations.go | 15 +++--- .../servers/destination/v1/destinations.go | 15 +++--- internal/servers/plugin/v3/plugin.go | 26 ++++++---- plugin/messages.go => message/message.go | 22 ++++----- plugin/plugin.go | 10 +++- plugin/plugin_destination.go | 16 +++--- plugin/plugin_source.go | 20 ++++---- plugin/plugin_test.go | 15 ++++-- ...testing_sync.go.backup => testing_sync.go} | 37 ++++++++------ plugin/testing_upsert.go | 7 +-- plugin/testing_write.go | 6 +-- plugin/testing_write_delete.go | 7 +-- plugin/testing_write_insert.go | 7 +-- plugin/testing_write_migrate.go | 9 ++-- scheduler/scheduler.go | 12 +++-- scheduler/scheduler_test.go | 25 ++++++---- serve/destination_v0_test.go.backup | 2 +- serve/destination_v1_test.go.backup | 2 +- writers/batch.go | 49 ++++++++++--------- writers/batch_test.go | 30 ++++++------ writers/mixed_batch.go | 41 ++++++++-------- writers/mixed_batch_test.go | 47 +++++++++--------- 23 files changed, 246 insertions(+), 195 deletions(-) rename plugin/messages.go => message/message.go (73%) rename plugin/{testing_sync.go.backup => testing_sync.go} (78%) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 00c76b4ae4..4c273ad3a4 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -7,6 +7,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -84,6 +85,10 @@ func (c *client) ID() string { return "testDestinationMemDB" } +func (c *client) GetSpec() any { + return &struct{}{} +} + func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { c.memoryDBLock.RLock() defer c.memoryDBLock.RUnlock() @@ -95,15 +100,15 @@ func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow return nil } -func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- plugin.Message) error { +func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- message.Message) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { - if !plugin.IsTable(tableName, options.Tables, options.SkipTables) { + if !plugin.MatchesTable(tableName, options.Tables, options.SkipTables) { continue } for _, row := range c.memoryDB[tableName] { - res <- &plugin.MessageInsert{ + res <- &message.Insert{ Record: row, Upsert: false, } @@ -139,7 +144,7 @@ func (c *client) migrate(_ context.Context, table *schema.Table) { c.tables[tableName] = table } -func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <-chan plugin.Message) error { +func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <-chan message.Message) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -155,11 +160,11 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <- c.memoryDBLock.Lock() switch msg := msg.(type) { - case *plugin.MessageMigrateTable: + case *message.MigrateTable: c.migrate(ctx, msg.Table) - case *plugin.MessageDeleteStale: + case *message.DeleteStale: c.deleteStale(ctx, msg) - case *plugin.MessageInsert: + case *message.Insert: sc := msg.Record.Schema() tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) if !ok { @@ -183,7 +188,7 @@ func (c *client) Close(context.Context) error { return nil } -func (c *client) deleteStale(_ context.Context, msg *plugin.MessageDeleteStale) { +func (c *client) deleteStale(_ context.Context, msg *message.DeleteStale) { var filteredTable []arrow.Record tableName := msg.Table.Name for i, row := range c.memoryDB[tableName] { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 905897889f..af89f7de26 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -12,6 +12,7 @@ import ( pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" "github.com/cloudquery/plugin-pb-go/specs/v0" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -62,7 +63,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) - writeCh := make(chan plugin.Message) + writeCh := make(chan message.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { return s.Plugin.Write(ctx, plugin.WriteOptions{ @@ -70,7 +71,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageMigrateTable{ + writeCh <- &message.MigrateTable{ Table: table, } } @@ -88,7 +89,7 @@ func (*Server) Write(pb.Destination_WriteServer) error { // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write2(msg pb.Destination_Write2Server) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) r, err := msg.Recv() if err != nil { @@ -125,7 +126,7 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { }) for _, table := range tables { - msgs <- &plugin.MessageMigrateTable{ + msgs <- &message.MigrateTable{ Table: table, } } @@ -175,7 +176,7 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { origResource.Data = append([]schemav2.CQType{sourceColumn, syncTimeColumn}, origResource.Data...) } convertedResource := CQTypesToRecord(memory.DefaultAllocator, []schemav2.CQTypes{origResource.Data}, table.ToArrowSchema()) - msg := &plugin.MessageInsert{ + msg := &message.Insert{ Record: convertedResource, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } @@ -235,7 +236,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) var writeErr error var wg sync.WaitGroup wg.Add(1) @@ -247,7 +248,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) - msgs <- &plugin.MessageDeleteStale{ + msgs <- &message.DeleteStale{ Table: table, SourceName: req.Source, SyncTime: req.Timestamp.AsTime(), diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index f213747ea5..b3534fc56a 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -12,6 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -60,7 +61,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr } s.setPKsForTables(tables) - writeCh := make(chan plugin.Message) + writeCh := make(chan message.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { return s.Plugin.Write(ctx, plugin.WriteOptions{ @@ -68,7 +69,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageMigrateTable{ + writeCh <- &message.MigrateTable{ Table: table, } } @@ -82,7 +83,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write(msg pb.Destination_WriteServer) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) r, err := msg.Recv() if err != nil { @@ -121,7 +122,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { }) for _, table := range tables { - msgs <- &plugin.MessageMigrateTable{ + msgs <- &message.MigrateTable{ Table: table, } } @@ -153,7 +154,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() - msg := &plugin.MessageInsert{ + msg := &message.Insert{ Record: rec, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } @@ -203,7 +204,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) var writeErr error var wg sync.WaitGroup wg.Add(1) @@ -215,7 +216,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) - msgs <- &plugin.MessageDeleteStale{ + msgs <- &message.DeleteStale{ Table: table, SourceName: req.Source, SyncTime: req.Timestamp.AsTime(), diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index defbc83be1..2a0b3ce429 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -2,6 +2,7 @@ package plugin import ( "context" + "encoding/json" "errors" "fmt" "io" @@ -9,6 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" @@ -57,14 +59,18 @@ func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVer } func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - if err := s.Plugin.Init(ctx, req.Spec); err != nil { + pluginSpec := s.Plugin.GetSpec() + if err := json.Unmarshal(req.GetSpec(), &pluginSpec); err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal plugin spec: %v", err) + } + if err := s.Plugin.Init(ctx, pluginSpec); err != nil { return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) } return &pb.Init_Response{}, nil } func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) var syncErr error ctx := stream.Context() @@ -108,14 +114,14 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { pbMsg := &pb.Sync_Response{} for msg := range msgs { switch m := msg.(type) { - case *plugin.MessageMigrateTable: + case *message.MigrateTable: m.Table.ToArrowSchema() pbMsg.Message = &pb.Sync_Response_MigrateTable{ MigrateTable: &pb.MessageMigrateTable{ Table: nil, }, } - case *plugin.MessageInsert: + case *message.Insert: recordBytes, err := schema.RecordToBytes(m.Record) if err != nil { return status.Errorf(codes.Internal, "failed to encode record: %v", err) @@ -126,7 +132,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { Upsert: m.Upsert, }, } - case *plugin.MessageDeleteStale: + case *message.DeleteStale: tableBytes, err := m.Table.ToArrowSchemaBytes() if err != nil { return status.Errorf(codes.Internal, "failed to encode record: %v", err) @@ -160,7 +166,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { } func (s *Server) Write(msg pb.Plugin_WriteServer) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) r, err := msg.Recv() if err != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v", err) @@ -192,7 +198,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } return status.Errorf(codes.Internal, "failed to receive msg: %v", err) } - var pluginMessage plugin.Message + var pluginMessage message.Message var pbMsgConvertErr error switch pbMsg := r.Message.(type) { case *pb.Write_Request_MigrateTable: @@ -201,7 +207,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) break } - pluginMessage = &plugin.MessageMigrateTable{ + pluginMessage = &message.MigrateTable{ Table: table, } case *pb.Write_Request_Insert: @@ -210,7 +216,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) break } - pluginMessage = &plugin.MessageInsert{ + pluginMessage = &message.Insert{ Record: record, Upsert: pbMsg.Insert.Upsert, } @@ -220,7 +226,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) break } - pluginMessage = &plugin.MessageDeleteStale{ + pluginMessage = &message.DeleteStale{ Table: table, SourceName: pbMsg.Delete.SourceName, SyncTime: pbMsg.Delete.SyncTime.AsTime(), diff --git a/plugin/messages.go b/message/message.go similarity index 73% rename from plugin/messages.go rename to message/message.go index dd5e91d0c7..f30f5a4308 100644 --- a/plugin/messages.go +++ b/message/message.go @@ -1,4 +1,4 @@ -package plugin +package message import ( "time" @@ -11,20 +11,20 @@ type Message interface { GetTable() *schema.Table } -type MessageMigrateTable struct { +type MigrateTable struct { Table *schema.Table } -func (m MessageMigrateTable) GetTable() *schema.Table { +func (m MigrateTable) GetTable() *schema.Table { return m.Table } -type MessageInsert struct { +type Insert struct { Record arrow.Record Upsert bool } -func (m MessageInsert) GetTable() *schema.Table { +func (m Insert) GetTable() *schema.Table { table, err := schema.NewTableFromArrowSchema(m.Record.Schema()) if err != nil { panic(err) @@ -32,31 +32,31 @@ func (m MessageInsert) GetTable() *schema.Table { return table } -// MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case +// DeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case // thus it might be deprecated in the future // in favour of MessageDelete or MessageRawQuery // The message indeciates that the destination needs to run something like "DELETE FROM table WHERE _cq_source_name=$1 and sync_time < $2" -type MessageDeleteStale struct { +type DeleteStale struct { Table *schema.Table SourceName string SyncTime time.Time } -func (m MessageDeleteStale) GetTable() *schema.Table { +func (m DeleteStale) GetTable() *schema.Table { return m.Table } type Messages []Message -type MigrateTables []*MessageMigrateTable +type MigrateTables []*MigrateTable -type Inserts []*MessageInsert +type Inserts []*Insert func (messages Messages) InsertItems() int64 { items := int64(0) for _, msg := range messages { switch m := msg.(type) { - case *MessageInsert: + case *Insert: items += m.Record.NumRows() } } diff --git a/plugin/plugin.go b/plugin/plugin.go index ab79cc633a..d327ee8b87 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -6,6 +6,7 @@ import ( "sync" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -21,7 +22,7 @@ type Client interface { type UnimplementedDestination struct{} -func (UnimplementedDestination) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (UnimplementedDestination) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { return ErrNotImplemented } @@ -31,7 +32,7 @@ func (UnimplementedDestination) Read(ctx context.Context, table *schema.Table, r type UnimplementedSource struct{} -func (UnimplementedSource) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (UnimplementedSource) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { return ErrNotImplemented } @@ -119,6 +120,11 @@ func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { return tables, nil } +// GetSpec returns an empty struct to be filled with the plugin's configuration. +func (p *Plugin) GetSpec() any { + return p.client.GetSpec() +} + // Init initializes the plugin with the given spec. func (p *Plugin) Init(ctx context.Context, spec any) error { if !p.mu.TryLock() { diff --git a/plugin/plugin_destination.go b/plugin/plugin_destination.go index 123bdf717f..2904b2a9a1 100644 --- a/plugin/plugin_destination.go +++ b/plugin/plugin_destination.go @@ -4,6 +4,7 @@ import ( "context" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -12,20 +13,21 @@ type WriteOptions struct { } type DestinationClient interface { - Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error - Write(ctx context.Context, options WriteOptions, res <-chan Message) error + GetSpec() any Close(ctx context.Context) error + Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error + Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error } // writeOne is currently used mostly for testing, so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { - resources := []Message{resource} +func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource message.Message) error { + resources := []message.Message{resource} return p.WriteAll(ctx, options, resources) } // WriteAll is currently used mostly for testing, so it's not a public api -func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []Message) error { - ch := make(chan Message, len(resources)) +func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []message.Message) error { + ch := make(chan message.Message, len(resources)) for _, resource := range resources { ch <- resource } @@ -33,7 +35,7 @@ func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources [ return p.Write(ctx, options, ch) } -func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { if err := p.client.Write(ctx, options, res); err != nil { return err } diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index 52d41243c4..c6d6089751 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -6,6 +6,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/glob" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" @@ -20,12 +21,13 @@ type SyncOptions struct { } type SourceClient interface { - Tables(ctx context.Context) (schema.Tables, error) - Sync(ctx context.Context, options SyncOptions, res chan<- Message) error + GetSpec() any Close(ctx context.Context) error + Tables(ctx context.Context) (schema.Tables, error) + Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error } -func IsTable(name string, includeTablesPattern []string, skipTablesPattern []string) bool { +func MatchesTable(name string, includeTablesPattern []string, skipTablesPattern []string) bool { for _, pattern := range skipTablesPattern { if glob.Glob(pattern, name) { return false @@ -39,11 +41,11 @@ func IsTable(name string, includeTablesPattern []string, skipTablesPattern []str return false } -type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (SourceClient, error) +type NewSourceClientFunc func(context.Context, zerolog.Logger, any) (SourceClient, error) // NewSourcePlugin returns a new CloudQuery Plugin with the given name, version and implementation. // Source plugins only support read operations. For Read & Write plugin use NewPlugin. -func NewSourcePlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { +func NewSourcePlugin(name string, version string, newClient NewSourceClientFunc, options ...Option) *Plugin { newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { sourceClient, err := newClient(ctx, logger, any) if err != nil { @@ -75,15 +77,15 @@ func (p *Plugin) readAll(ctx context.Context, table *schema.Table) ([]arrow.Reco return records, err } -func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, error) { +func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (message.Messages, error) { var err error - ch := make(chan Message) + ch := make(chan message.Message) go func() { defer close(ch) err = p.Sync(ctx, options, ch) }() // nolint:prealloc - var resources []Message + var resources []message.Message for resource := range ch { resources = append(resources, resource) } @@ -91,7 +93,7 @@ func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, er } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index b797e81691..b96d9fc657 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -13,13 +14,17 @@ type testPluginSpec struct { } type testPluginClient struct { - messages []Message + messages []message.Message } func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { return &testPluginClient{}, nil } +func (c *testPluginClient) GetSpec() any { + return &struct{}{} +} + func (c *testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { return schema.Tables{}, nil } @@ -28,13 +33,13 @@ func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, res ch return nil } -func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { for _, msg := range c.messages { res <- msg } return nil } -func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { for msg := range res { c.messages = append(c.messages, msg) } @@ -60,8 +65,8 @@ func TestPluginSuccess(t *testing.T) { if err := p.WriteAll(ctx, WriteOptions{}, nil); err != nil { t.Fatal(err) } - if err := p.WriteAll(ctx, WriteOptions{}, []Message{ - MessageMigrateTable{}, + if err := p.WriteAll(ctx, WriteOptions{}, []message.Message{ + message.MigrateTable{}, }); err != nil { t.Fatal(err) } diff --git a/plugin/testing_sync.go.backup b/plugin/testing_sync.go similarity index 78% rename from plugin/testing_sync.go.backup rename to plugin/testing_sync.go index ecd136ca00..608b7cd653 100644 --- a/plugin/testing_sync.go.backup +++ b/plugin/testing_sync.go @@ -8,10 +8,11 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) -type Validator func(t *testing.T, plugin *Plugin, resources []arrow.Record) +type Validator func(t *testing.T, plugin *Plugin, resources []message.Message) func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, opts ...TestPluginOption) { t.Helper() @@ -27,7 +28,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, t.Parallel() } - resourcesChannel := make(chan arrow.Record) + resourcesChannel := make(chan message.Message) var syncErr error if err := plugin.Init(context.Background(), spec); err != nil { @@ -39,7 +40,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, syncErr = plugin.Sync(context.Background(), options, resourcesChannel) }() - syncedResources := make([]arrow.Record, 0) + syncedResources := make([]message.Message, 0) for resource := range resourcesChannel { syncedResources = append(syncedResources, resource) } @@ -70,28 +71,32 @@ type testPluginOptions struct { validators []Validator } -func getTableResources(t *testing.T, table *schema.Table, resources []arrow.Record) []arrow.Record { +func getTableResources(t *testing.T, table *schema.Table, messages []message.Message) []arrow.Record { t.Helper() tableResources := make([]arrow.Record, 0) - - for _, resource := range resources { - md := resource.Schema().Metadata() - tableName, ok := md.GetValue(schema.MetadataTableName) - if !ok { - t.Errorf("Expected table name to be set in metadata") - } - if tableName == table.Name { - tableResources = append(tableResources, resource) + for _, msg := range messages { + switch v := msg.(type) { + case *message.Insert: + md := v.Record.Schema().Metadata() + tableName, ok := md.GetValue(schema.MetadataTableName) + if !ok { + t.Errorf("Expected table name to be set in metadata") + } + if tableName == table.Name { + tableResources = append(tableResources, v.Record) + } + default: + t.Errorf("Unexpected message type %T", v) } } return tableResources } -func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) { +func validateTable(t *testing.T, table *schema.Table, messages []message.Message) { t.Helper() - tableResources := getTableResources(t, table, resources) + tableResources := getTableResources(t, table, messages) if len(tableResources) == 0 { t.Errorf("Expected table %s to be synced but it was not found", table.Name) return @@ -99,7 +104,7 @@ func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) validateResources(t, table, tableResources) } -func validatePlugin(t *testing.T, plugin *Plugin, resources []arrow.Record) { +func validatePlugin(t *testing.T, plugin *Plugin, resources []message.Message) { t.Helper() tables, err := plugin.Tables(context.Background()) if err != nil { diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 268d65a45f..a32c7d6497 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -8,6 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -19,7 +20,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.MigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -29,7 +30,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, Upsert: true, }); err != nil { @@ -45,7 +46,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, Upsert: true, }); err != nil { diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 2dd9c4b074..e3a6eddc39 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -37,14 +37,14 @@ type SafeMigrations struct { } type PluginTestSuiteTests struct { - // SkipUpsert skips testing with MessageInsert and Upsert=true. + // SkipUpsert skips testing with message.Insert and Upsert=true. // Usually when a destination is not supporting primary keys SkipUpsert bool - // SkipDeleteStale skips testing MessageDelete events. + // SkipDeleteStale skips testing message.Delete events. SkipDeleteStale bool - // SkipAppend skips testing MessageInsert and Upsert=false. + // SkipAppend skips testing message.Insert and Upsert=false. SkipInsert bool // SkipMigrate skips testing migration diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index 69e6ec2e92..5ec89b8d93 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -7,6 +7,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -20,7 +21,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { schema.CqSyncTimeColumn, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.MigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -31,7 +32,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime) record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) @@ -51,7 +52,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("test") bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDeleteStale{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.DeleteStale{ Table: table, SourceName: "test", SyncTime: syncTime, diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index f0cc3e8d83..7dc987a94b 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -8,6 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -27,7 +28,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.MigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -37,7 +38,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, Upsert: false, }); err != nil { @@ -53,7 +54,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 697d4cfda7..7418f2b043 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -8,6 +8,7 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" @@ -20,7 +21,7 @@ func tableUUIDSuffix() string { func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ MigrateForce: writeOptionMigrateForce, - }, &MessageMigrateTable{ + }, &message.MigrateTable{ Table: source, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -37,7 +38,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou resource1 := schema.GenTestData(source, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: resource1, }); err != nil { return fmt.Errorf("failed to insert first record: %w", err) @@ -52,14 +53,14 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &message.MigrateTable{ Table: target, }); err != nil { return fmt.Errorf("failed to create table: %w", err) } resource2 := schema.GenTestData(target, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: resource2, }); err != nil { return fmt.Errorf("failed to insert second record: %w", err) diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 66f56845d7..9d53abbb15 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -9,10 +9,10 @@ import ( "sync/atomic" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" @@ -83,6 +83,10 @@ func WithSchedulerStrategy(strategy Strategy) Option { } } +type Client interface { + ID() string +} + type Scheduler struct { tables schema.Tables client schema.ClientMeta @@ -119,7 +123,7 @@ func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option return &s } -func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { +func (s *Scheduler) Sync(ctx context.Context, res chan<- message.Message) error { resources := make(chan *schema.Resource) go func() { defer close(resources) @@ -137,12 +141,12 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) scalar.AppendToRecordBuilder(bldr, vector) rec := bldr.NewRecord() - res <- rec + res <- &message.Insert{Record: rec} } return nil } -func (s *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { +func (s *Scheduler) logTablesMetrics(tables schema.Tables, client Client) { clientName := client.ID() for _, table := range tables { metrics := s.metrics.TableClient[table.Name][clientName] diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index 6eb6f3db01..fa5aa9669f 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -6,6 +6,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -229,25 +230,31 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist WithDeterministicCQId(deterministicCQID), } sc := NewScheduler(tables, &c, opts...) - records := make(chan arrow.Record, 10) - if err := sc.Sync(ctx, records); err != nil { + msgs := make(chan message.Message, 10) + if err := sc.Sync(ctx, msgs); err != nil { t.Fatal(err) } - close(records) + close(msgs) var i int - for record := range records { + for msg := range msgs { if tc.data == nil { - t.Fatalf("Unexpected resource %v", record) + t.Fatalf("Unexpected message %v", msg) } if i >= len(tc.data) { t.Fatalf("expected %d resources. got %d", len(tc.data), i) } - rec := tc.data[i].ToArrowRecord(record.Schema()) - if !array.RecordEqual(rec, record) { - t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) + switch v := msg.(type) { + case *message.Insert: + record := v.Record + rec := tc.data[i].ToArrowRecord(record.Schema()) + if !array.RecordEqual(rec, record) { + t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) + } + i++ + default: + t.Fatalf("expected insert message. got %v", msg) } - i++ } if len(tc.data) != i { t.Fatalf("expected %d resources. got %d", len(tc.data), i) diff --git a/serve/destination_v0_test.go.backup b/serve/destination_v0_test.go.backup index ff0ad377d3..6c2ca95965 100644 --- a/serve/destination_v0_test.go.backup +++ b/serve/destination_v0_test.go.backup @@ -139,7 +139,7 @@ func TestDestination(t *testing.T) { destRecord := serversDestination.CQTypesOneToRecord(memory.DefaultAllocator, destResource.Data, table.ToArrowSchema()) for _, msg := range msgs { totalResources++ - m := msg.(*plugin.MessageInsert) + m := msg.(*message.Insert) if !array.RecordEqual(destRecord, m.Record) { // diff := destination.RecordDiff(destRecord, resource) t.Fatalf("expected %v but got %v", destRecord, m.Record) diff --git a/serve/destination_v1_test.go.backup b/serve/destination_v1_test.go.backup index abc789ff2d..d12aea4db1 100644 --- a/serve/destination_v1_test.go.backup +++ b/serve/destination_v1_test.go.backup @@ -138,7 +138,7 @@ func TestDestinationV1(t *testing.T) { totalResources := 0 for _, msg := range msgs { totalResources++ - m := msg.(*plugin.MessageInsert) + m := msg.(*message.Insert) if !array.RecordEqual(rec, m.Record) { // diff := plugin.RecordDiff(rec, resource) // t.Fatalf("diff at %d: %s", totalResources, diff) diff --git a/writers/batch.go b/writers/batch.go index 34f15c84f0..26661d864f 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/internal/pk" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -16,7 +17,7 @@ import ( ) type Writer interface { - Write(ctx context.Context, writeOptions plugin.WriteOptions, res <-chan plugin.Message) error + Write(ctx context.Context, writeOptions plugin.WriteOptions, res <-chan message.Message) error } const ( @@ -27,9 +28,9 @@ const ( ) type BatchWriterClient interface { - MigrateTables(context.Context, []*plugin.MessageMigrateTable) error - WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*plugin.MessageInsert) error - DeleteStale(context.Context, []*plugin.MessageDeleteStale) error + MigrateTables(context.Context, []*message.MigrateTable) error + WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*message.Insert) error + DeleteStale(context.Context, []*message.DeleteStale) error } type BatchWriter struct { @@ -38,8 +39,8 @@ type BatchWriter struct { workers map[string]*worker workersLock *sync.RWMutex workersWaitGroup *sync.WaitGroup - migrateTableMessages []*plugin.MessageMigrateTable - deleteStaleMessages []*plugin.MessageDeleteStale + migrateTableMessages []*message.MigrateTable + deleteStaleMessages []*message.DeleteStale logger zerolog.Logger batchTimeout time.Duration @@ -82,7 +83,7 @@ func WithBatchSizeBytes(size int) Option { type worker struct { count int wg *sync.WaitGroup - ch chan *plugin.MessageInsert + ch chan *message.Insert flush chan chan bool } @@ -101,8 +102,8 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err for _, opt := range opts { opt(c) } - c.migrateTableMessages = make([]*plugin.MessageMigrateTable, 0, c.batchSize) - c.deleteStaleMessages = make([]*plugin.MessageDeleteStale, 0, c.batchSize) + c.migrateTableMessages = make([]*message.MigrateTable, 0, c.batchSize) + c.deleteStaleMessages = make([]*message.DeleteStale, 0, c.batchSize) return c, nil } @@ -130,9 +131,9 @@ func (w *BatchWriter) Close(ctx context.Context) error { return nil } -func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *plugin.MessageInsert, flush <-chan chan bool) { +func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *message.Insert, flush <-chan chan bool) { sizeBytes := int64(0) - resources := make([]*plugin.MessageInsert, 0) + resources := make([]*message.Insert, 0) upsertBatch := false for { select { @@ -145,7 +146,7 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *p } if upsertBatch != r.Upsert { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 upsertBatch = r.Upsert resources = append(resources, r) @@ -156,19 +157,19 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *p } if len(resources) >= w.batchSize || sizeBytes+util.TotalRecordSize(r.Record) >= int64(w.batchSizeBytes) { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 } case <-time.After(w.batchTimeout): if len(resources) > 0 { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 } done <- true @@ -179,7 +180,7 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *p } } -func (w *BatchWriter) flush(ctx context.Context, tableName string, upsertBatch bool, resources []*plugin.MessageInsert) { +func (w *BatchWriter) flush(ctx context.Context, tableName string, upsertBatch bool, resources []*message.Insert) { // resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) @@ -247,8 +248,8 @@ func (w *BatchWriter) flushInsert(ctx context.Context, tableName string) { <-ch } -func (w *BatchWriter) writeAll(ctx context.Context, msgs []plugin.Message) error { - ch := make(chan plugin.Message, len(msgs)) +func (w *BatchWriter) writeAll(ctx context.Context, msgs []message.Message) error { + ch := make(chan message.Message, len(msgs)) for _, msg := range msgs { ch <- msg } @@ -256,10 +257,10 @@ func (w *BatchWriter) writeAll(ctx context.Context, msgs []plugin.Message) error return w.Write(ctx, ch) } -func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) error { +func (w *BatchWriter) Write(ctx context.Context, msgs <-chan message.Message) error { for msg := range msgs { switch m := msg.(type) { - case *plugin.MessageDeleteStale: + case *message.DeleteStale: if len(w.migrateTableMessages) > 0 { if err := w.flushMigrateTables(ctx); err != nil { return err @@ -272,7 +273,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err return err } } - case *plugin.MessageInsert: + case *message.Insert: if len(w.migrateTableMessages) > 0 { if err := w.flushMigrateTables(ctx); err != nil { return err @@ -286,7 +287,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err if err := w.startWorker(ctx, m); err != nil { return err } - case *plugin.MessageMigrateTable: + case *message.MigrateTable: w.flushInsert(ctx, m.Table.Name) if len(w.deleteStaleMessages) > 0 { if err := w.flushDeleteStaleTables(ctx); err != nil { @@ -304,7 +305,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err return nil } -func (w *BatchWriter) startWorker(ctx context.Context, msg *plugin.MessageInsert) error { +func (w *BatchWriter) startWorker(ctx context.Context, msg *message.Insert) error { w.workersLock.RLock() md := msg.Record.Schema().Metadata() tableName, ok := md.GetValue(schema.MetadataTableName) @@ -319,7 +320,7 @@ func (w *BatchWriter) startWorker(ctx context.Context, msg *plugin.MessageInsert return nil } w.workersLock.Lock() - ch := make(chan *plugin.MessageInsert) + ch := make(chan *message.Insert) flush := make(chan chan bool) wr = &worker{ count: 1, diff --git a/writers/batch_test.go b/writers/batch_test.go index deef3bc627..35452ca7ee 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -8,26 +8,26 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) type testBatchClient struct { - migrateTables []*plugin.MessageMigrateTable - inserts []*plugin.MessageInsert - deleteStales []*plugin.MessageDeleteStale + migrateTables []*message.MigrateTable + inserts []*message.Insert + deleteStales []*message.DeleteStale } -func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*plugin.MessageMigrateTable) error { +func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*message.MigrateTable) error { c.migrateTables = append(c.migrateTables, msgs...) return nil } -func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*plugin.MessageInsert) error { +func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*message.Insert) error { c.inserts = append(c.inserts, msgs...) return nil } -func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*plugin.MessageDeleteStale) error { +func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*message.DeleteStale) error { c.deleteStales = append(c.deleteStales, msgs...) return nil } @@ -67,13 +67,13 @@ func TestBatchFlushDifferentMessages(t *testing.T) { bldr := array.NewRecordBuilder(memory.DefaultAllocator, batchTestTables[0].ToArrowSchema()) bldr.Field(0).(*array.Int64Builder).Append(1) record := bldr.NewRecord() - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } if len(testClient.migrateTables) != 0 { t.Fatalf("expected 0 create table messages, got %d", len(testClient.migrateTables)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{Record: record}}); err != nil { + if err := wr.writeAll(ctx, []message.Message{&message.Insert{Record: record}}); err != nil { t.Fatal(err) } if len(testClient.migrateTables) != 1 { @@ -84,7 +84,7 @@ func TestBatchFlushDifferentMessages(t *testing.T) { t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } @@ -103,7 +103,7 @@ func TestBatchSize(t *testing.T) { } table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} record := array.NewRecord(table.ToArrowSchema(), nil, 0) - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) @@ -113,7 +113,7 @@ func TestBatchSize(t *testing.T) { t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) @@ -136,7 +136,7 @@ func TestBatchTimeout(t *testing.T) { } table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} record := array.NewRecord(table.ToArrowSchema(), nil, 0) - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) @@ -171,7 +171,7 @@ func TestBatchUpserts(t *testing.T) { } table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} record := array.NewRecord(table.ToArrowSchema(), nil, 0) - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, Upsert: true, }}); err != nil { @@ -182,7 +182,7 @@ func TestBatchUpserts(t *testing.T) { t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index 998a9d7c13..f6704cf488 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -6,6 +6,7 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/util" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -21,9 +22,9 @@ var allMsgTypes = []int{msgTypeMigrateTable, msgTypeInsert, msgTypeDeleteStale} // MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - MigrateTableBatch(ctx context.Context, messages []*plugin.MessageMigrateTable, options plugin.WriteOptions) error - InsertBatch(ctx context.Context, messages []*plugin.MessageInsert, options plugin.WriteOptions) error - DeleteStaleBatch(ctx context.Context, messages []*plugin.MessageDeleteStale, options plugin.WriteOptions) error + MigrateTableBatch(ctx context.Context, messages []*message.MigrateTable, options plugin.WriteOptions) error + InsertBatch(ctx context.Context, messages []*message.Insert, options plugin.WriteOptions) error + DeleteStaleBatch(ctx context.Context, messages []*message.DeleteStale, options plugin.WriteOptions) error } type MixedBatchWriter struct { @@ -78,33 +79,33 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption return c, nil } -func msgID(msg plugin.Message) int { +func msgID(msg message.Message) int { switch msg.(type) { - case plugin.MessageMigrateTable, *plugin.MessageMigrateTable: + case message.MigrateTable, *message.MigrateTable: return msgTypeMigrateTable - case plugin.MessageInsert, *plugin.MessageInsert: + case message.Insert, *message.Insert: return msgTypeInsert - case plugin.MessageDeleteStale, *plugin.MessageDeleteStale: + case message.DeleteStale, *message.DeleteStale: return msgTypeDeleteStale } panic("unknown message type: " + reflect.TypeOf(msg).Name()) } // Write starts listening for messages on the msgChan channel and writes them to the client in batches. -func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan plugin.Message) error { - migrateTable := &batchManager[*plugin.MessageMigrateTable]{ - batch: make([]*plugin.MessageMigrateTable, 0, w.batchSize), +func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan message.Message) error { + migrateTable := &batchManager[*message.MigrateTable]{ + batch: make([]*message.MigrateTable, 0, w.batchSize), writeFunc: w.client.MigrateTableBatch, writeOptions: options, } insert := &insertBatchManager{ - batch: make([]*plugin.MessageInsert, 0, w.batchSize), + batch: make([]*message.Insert, 0, w.batchSize), writeFunc: w.client.InsertBatch, maxBatchSizeBytes: int64(w.batchSizeBytes), writeOptions: options, } - deleteStale := &batchManager[*plugin.MessageDeleteStale]{ - batch: make([]*plugin.MessageDeleteStale, 0, w.batchSize), + deleteStale := &batchManager[*message.DeleteStale]{ + batch: make([]*message.DeleteStale, 0, w.batchSize), writeFunc: w.client.DeleteStaleBatch, writeOptions: options, } @@ -131,11 +132,11 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } prevMsgType = msgType switch v := msg.(type) { - case *plugin.MessageMigrateTable: + case *message.MigrateTable: err = migrateTable.append(ctx, v) - case *plugin.MessageInsert: + case *message.Insert: err = insert.append(ctx, v) - case *plugin.MessageDeleteStale: + case *message.DeleteStale: err = deleteStale.append(ctx, v) default: panic("unknown message type") @@ -151,7 +152,7 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } // generic batch manager for most message types -type batchManager[T plugin.Message] struct { +type batchManager[T message.Message] struct { batch []T writeFunc func(ctx context.Context, messages []T, options plugin.WriteOptions) error writeOptions plugin.WriteOptions @@ -182,14 +183,14 @@ func (m *batchManager[T]) flush(ctx context.Context) error { // special batch manager for insert messages that also keeps track of the total size of the batch type insertBatchManager struct { - batch []*plugin.MessageInsert - writeFunc func(ctx context.Context, messages []*plugin.MessageInsert, writeOptions plugin.WriteOptions) error + batch []*message.Insert + writeFunc func(ctx context.Context, messages []*message.Insert, writeOptions plugin.WriteOptions) error curBatchSizeBytes int64 maxBatchSizeBytes int64 writeOptions plugin.WriteOptions } -func (m *insertBatchManager) append(ctx context.Context, msg *plugin.MessageInsert) error { +func (m *insertBatchManager) append(ctx context.Context, msg *message.Insert) error { if len(m.batch) == cap(m.batch) || m.curBatchSizeBytes+util.TotalRecordSize(msg.Record) > m.maxBatchSizeBytes { if err := m.flush(ctx); err != nil { return err diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 8b952356e2..1ad37cc02f 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -8,16 +8,17 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" ) type testMixedBatchClient struct { - receivedBatches [][]plugin.Message + receivedBatches [][]message.Message } -func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*plugin.MessageMigrateTable, options plugin.WriteOptions) error { - m := make([]plugin.Message, len(msgs)) +func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*message.MigrateTable, options plugin.WriteOptions) error { + m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg } @@ -25,8 +26,8 @@ func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*pl return nil } -func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*plugin.MessageInsert, options plugin.WriteOptions) error { - m := make([]plugin.Message, len(msgs)) +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*message.Insert, options plugin.WriteOptions) error { + m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg } @@ -34,8 +35,8 @@ func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*plugin.M return nil } -func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []*plugin.MessageDeleteStale, options plugin.WriteOptions) error { - m := make([]plugin.Message, len(msgs)) +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []*message.DeleteStale, options plugin.WriteOptions) error { + m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg } @@ -58,7 +59,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgMigrateTable1 := &plugin.MessageMigrateTable{ + msgMigrateTable1 := &message.MigrateTable{ Table: table1, } @@ -72,7 +73,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgMigrateTable2 := &plugin.MessageMigrateTable{ + msgMigrateTable2 := &message.MigrateTable{ Table: table2, } @@ -80,7 +81,7 @@ func TestMixedBatchWriter(t *testing.T) { bldr1 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr1.Field(0).(*array.Int64Builder).Append(1) rec1 := bldr1.NewRecord() - msgInsertTable1 := &plugin.MessageInsert{ + msgInsertTable1 := &message.Insert{ Record: rec1, } @@ -88,18 +89,18 @@ func TestMixedBatchWriter(t *testing.T) { bldr2 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr2.Field(0).(*array.Int64Builder).Append(1) rec2 := bldr2.NewRecord() - msgInsertTable2 := &plugin.MessageInsert{ + msgInsertTable2 := &message.Insert{ Record: rec2, Upsert: false, } // message to delete stale from table1 - msgDeleteStale1 := &plugin.MessageDeleteStale{ + msgDeleteStale1 := &message.DeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), } - msgDeleteStale2 := &plugin.MessageDeleteStale{ + msgDeleteStale2 := &message.DeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), @@ -107,12 +108,12 @@ func TestMixedBatchWriter(t *testing.T) { testCases := []struct { name string - messages []plugin.Message - wantBatches [][]plugin.Message + messages []message.Message + wantBatches [][]message.Message }{ { name: "create table, insert, delete stale", - messages: []plugin.Message{ + messages: []message.Message{ msgMigrateTable1, msgMigrateTable2, msgInsertTable1, @@ -120,7 +121,7 @@ func TestMixedBatchWriter(t *testing.T) { msgDeleteStale1, msgDeleteStale2, }, - wantBatches: [][]plugin.Message{ + wantBatches: [][]message.Message{ {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1, msgInsertTable2}, {msgDeleteStale1, msgDeleteStale2}, @@ -128,7 +129,7 @@ func TestMixedBatchWriter(t *testing.T) { }, { name: "interleaved messages", - messages: []plugin.Message{ + messages: []message.Message{ msgMigrateTable1, msgInsertTable1, msgDeleteStale1, @@ -136,7 +137,7 @@ func TestMixedBatchWriter(t *testing.T) { msgInsertTable2, msgDeleteStale2, }, - wantBatches: [][]plugin.Message{ + wantBatches: [][]message.Message{ {msgMigrateTable1}, {msgInsertTable1}, {msgDeleteStale1}, @@ -147,7 +148,7 @@ func TestMixedBatchWriter(t *testing.T) { }, { name: "interleaved messages", - messages: []plugin.Message{ + messages: []message.Message{ msgMigrateTable1, msgMigrateTable2, msgInsertTable1, @@ -155,7 +156,7 @@ func TestMixedBatchWriter(t *testing.T) { msgInsertTable2, msgDeleteStale1, }, - wantBatches: [][]plugin.Message{ + wantBatches: [][]message.Message{ {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1}, {msgDeleteStale2}, @@ -168,13 +169,13 @@ func TestMixedBatchWriter(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { client := &testMixedBatchClient{ - receivedBatches: make([][]plugin.Message, 0), + receivedBatches: make([][]message.Message, 0), } wr, err := NewMixedBatchWriter(client) if err != nil { t.Fatal(err) } - ch := make(chan plugin.Message, len(tc.messages)) + ch := make(chan message.Message, len(tc.messages)) for _, msg := range tc.messages { ch <- msg } From 491f2c2159bf9a76530ec553fad82f73654a118f Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 18:33:42 +0300 Subject: [PATCH 037/125] move to []byte --- internal/memdb/memdb.go | 9 ++++++--- internal/servers/destination/v1/destinations.go | 6 +++++- internal/servers/plugin/v3/plugin.go | 7 +------ plugin/plugin.go | 9 ++------- plugin/plugin_source.go | 4 ++-- plugin/plugin_test.go | 4 ++-- plugin/testing_sync.go | 2 +- 7 files changed, 19 insertions(+), 22 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 4c273ad3a4..42d2f89c16 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -24,6 +24,9 @@ type client struct { type Option func(*client) +type Spec struct { +} + func WithErrOnWrite() Option { return func(c *client) { c.errOnWrite = true @@ -44,12 +47,12 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, any) (plugin.Client, error) { + return func(context.Context, zerolog.Logger, []byte) (plugin.Client, error) { return c, nil } } -func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (plugin.Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec []byte) (plugin.Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), @@ -86,7 +89,7 @@ func (c *client) ID() string { } func (c *client) GetSpec() any { - return &struct{}{} + return &Spec{} } func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index b3534fc56a..83f02462d9 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -35,7 +35,11 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) + pluginSpec, err := json.Marshal(s.spec.Spec) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to marshal spec: %v", err) + } + return &pb.Configure_Response{}, s.Plugin.Init(ctx, pluginSpec) } func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 2a0b3ce429..77c41bea29 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -2,7 +2,6 @@ package plugin import ( "context" - "encoding/json" "errors" "fmt" "io" @@ -59,11 +58,7 @@ func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVer } func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - pluginSpec := s.Plugin.GetSpec() - if err := json.Unmarshal(req.GetSpec(), &pluginSpec); err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal plugin spec: %v", err) - } - if err := s.Plugin.Init(ctx, pluginSpec); err != nil { + if err := s.Plugin.Init(ctx, req.Spec); err != nil { return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) } return &pb.Init_Response{}, nil diff --git a/plugin/plugin.go b/plugin/plugin.go index d327ee8b87..81fe39dcfd 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -13,7 +13,7 @@ import ( var ErrNotImplemented = fmt.Errorf("not implemented") -type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) +type NewClientFunc func(context.Context, zerolog.Logger, []byte) (Client, error) type Client interface { SourceClient @@ -120,13 +120,8 @@ func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { return tables, nil } -// GetSpec returns an empty struct to be filled with the plugin's configuration. -func (p *Plugin) GetSpec() any { - return p.client.GetSpec() -} - // Init initializes the plugin with the given spec. -func (p *Plugin) Init(ctx context.Context, spec any) error { +func (p *Plugin) Init(ctx context.Context, spec []byte) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index c6d6089751..6b015ae7ea 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -46,8 +46,8 @@ type NewSourceClientFunc func(context.Context, zerolog.Logger, any) (SourceClien // NewSourcePlugin returns a new CloudQuery Plugin with the given name, version and implementation. // Source plugins only support read operations. For Read & Write plugin use NewPlugin. func NewSourcePlugin(name string, version string, newClient NewSourceClientFunc, options ...Option) *Plugin { - newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { - sourceClient, err := newClient(ctx, logger, any) + newClientWrapper := func(ctx context.Context, logger zerolog.Logger, spec []byte) (Client, error) { + sourceClient, err := newClient(ctx, logger, spec) if err != nil { return nil, err } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index b96d9fc657..d6fdfcba79 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -17,7 +17,7 @@ type testPluginClient struct { messages []message.Message } -func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { +func newTestPluginClient(context.Context, zerolog.Logger, []byte) (Client, error) { return &testPluginClient{}, nil } @@ -52,7 +52,7 @@ func (c *testPluginClient) Close(context.Context) error { func TestPluginSuccess(t *testing.T) { ctx := context.Background() p := NewPlugin("test", "v1.0.0", newTestPluginClient) - if err := p.Init(ctx, &testPluginSpec{}); err != nil { + if err := p.Init(ctx, []byte("")); err != nil { t.Fatal(err) } tables, err := p.Tables(ctx) diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 608b7cd653..4bea08f8c1 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -14,7 +14,7 @@ import ( type Validator func(t *testing.T, plugin *Plugin, resources []message.Message) -func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, opts ...TestPluginOption) { +func TestPluginSync(t *testing.T, plugin *Plugin, spec []byte, options SyncOptions, opts ...TestPluginOption) { t.Helper() o := &testPluginOptions{ From 0b9625dc69c092415976c4161ea63b35d7a890fd Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 21:05:28 +0300 Subject: [PATCH 038/125] fix data race --- writers/batch.go | 62 +++++++++++++++-------------- writers/batch_test.go | 91 ++++++++++++++++++++++++++++++------------- 2 files changed, 97 insertions(+), 56 deletions(-) diff --git a/writers/batch.go b/writers/batch.go index 26661d864f..e4839490d3 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -13,7 +13,6 @@ import ( "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" - "golang.org/x/sync/semaphore" ) type Writer interface { @@ -22,7 +21,6 @@ type Writer interface { const ( defaultBatchTimeoutSeconds = 20 - defaultMaxWorkers = int64(10000) defaultBatchSize = 10000 defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB ) @@ -35,11 +33,13 @@ type BatchWriterClient interface { type BatchWriter struct { client BatchWriterClient - semaphore *semaphore.Weighted workers map[string]*worker workersLock *sync.RWMutex workersWaitGroup *sync.WaitGroup + + migrateTableLock *sync.Mutex migrateTableMessages []*message.MigrateTable + deleteStaleLock *sync.Mutex deleteStaleMessages []*message.DeleteStale logger zerolog.Logger @@ -62,12 +62,6 @@ func WithBatchTimeout(timeout time.Duration) Option { } } -func WithMaxWorkers(n int64) Option { - return func(p *BatchWriter) { - p.semaphore = semaphore.NewWeighted(n) - } -} - func WithBatchSize(size int) Option { return func(p *BatchWriter) { p.batchSize = size @@ -82,7 +76,6 @@ func WithBatchSizeBytes(size int) Option { type worker struct { count int - wg *sync.WaitGroup ch chan *message.Insert flush chan chan bool } @@ -93,11 +86,12 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err workers: make(map[string]*worker), workersLock: &sync.RWMutex{}, workersWaitGroup: &sync.WaitGroup{}, + migrateTableLock: &sync.Mutex{}, + deleteStaleLock: &sync.Mutex{}, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, batchSize: defaultBatchSize, batchSizeBytes: defaultBatchSizeBytes, - semaphore: semaphore.NewWeighted(defaultMaxWorkers), } for _, opt := range opts { opt(c) @@ -219,6 +213,11 @@ func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow. } func (w *BatchWriter) flushMigrateTables(ctx context.Context) error { + w.migrateTableLock.Lock() + defer w.migrateTableLock.Unlock() + if len(w.migrateTableMessages) == 0 { + return nil + } if err := w.client.MigrateTables(ctx, w.migrateTableMessages); err != nil { return err } @@ -227,6 +226,11 @@ func (w *BatchWriter) flushMigrateTables(ctx context.Context) error { } func (w *BatchWriter) flushDeleteStaleTables(ctx context.Context) error { + w.deleteStaleLock.Lock() + defer w.deleteStaleLock.Unlock() + if len(w.deleteStaleMessages) == 0 { + return nil + } if err := w.client.DeleteStale(ctx, w.deleteStaleMessages); err != nil { return err } @@ -261,41 +265,39 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan message.Message) er for msg := range msgs { switch m := msg.(type) { case *message.DeleteStale: - if len(w.migrateTableMessages) > 0 { - if err := w.flushMigrateTables(ctx); err != nil { - return err - } + if err := w.flushMigrateTables(ctx); err != nil { + return err } w.flushInsert(ctx, m.Table.Name) + w.deleteStaleLock.Lock() w.deleteStaleMessages = append(w.deleteStaleMessages, m) - if len(w.deleteStaleMessages) > w.batchSize { + l := len(w.deleteStaleMessages) + w.deleteStaleLock.Unlock() + if l > w.batchSize { if err := w.flushDeleteStaleTables(ctx); err != nil { return err } } case *message.Insert: - if len(w.migrateTableMessages) > 0 { - if err := w.flushMigrateTables(ctx); err != nil { - return err - } + if err := w.flushMigrateTables(ctx); err != nil { + return err } - if len(w.deleteStaleMessages) > 0 { - if err := w.flushDeleteStaleTables(ctx); err != nil { - return err - } + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err } if err := w.startWorker(ctx, m); err != nil { return err } case *message.MigrateTable: w.flushInsert(ctx, m.Table.Name) - if len(w.deleteStaleMessages) > 0 { - if err := w.flushDeleteStaleTables(ctx); err != nil { - return err - } + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err } + w.migrateTableLock.Lock() w.migrateTableMessages = append(w.migrateTableMessages, m) - if len(w.migrateTableMessages) > w.batchSize { + l := len(w.migrateTableMessages) + w.migrateTableLock.Unlock() + if l > w.batchSize { if err := w.flushMigrateTables(ctx); err != nil { return err } @@ -316,7 +318,7 @@ func (w *BatchWriter) startWorker(ctx context.Context, msg *message.Insert) erro wr, ok := w.workers[tableName] w.workersLock.RUnlock() if ok { - w.workers[tableName].ch <- msg + wr.ch <- msg return nil } w.workersLock.Lock() diff --git a/writers/batch_test.go b/writers/batch_test.go index 35452ca7ee..636553d8d5 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -2,6 +2,7 @@ package writers import ( "context" + "sync" "testing" "time" @@ -13,21 +14,46 @@ import ( ) type testBatchClient struct { + mutex *sync.Mutex migrateTables []*message.MigrateTable inserts []*message.Insert deleteStales []*message.DeleteStale } +func (c *testBatchClient) MigrateTablesLen() int { + c.mutex.Lock() + defer c.mutex.Unlock() + return len(c.migrateTables) +} + +func (c *testBatchClient) InsertsLen() int { + c.mutex.Lock() + defer c.mutex.Unlock() + return len(c.inserts) +} + +func (c *testBatchClient) DeleteStalesLen() int { + c.mutex.Lock() + defer c.mutex.Unlock() + return len(c.deleteStales) +} + func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*message.MigrateTable) error { + c.mutex.Lock() + defer c.mutex.Unlock() c.migrateTables = append(c.migrateTables, msgs...) return nil } func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*message.Insert) error { + c.mutex.Lock() + defer c.mutex.Unlock() c.inserts = append(c.inserts, msgs...) return nil } func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*message.DeleteStale) error { + c.mutex.Lock() + defer c.mutex.Unlock() c.deleteStales = append(c.deleteStales, msgs...) return nil } @@ -58,7 +84,9 @@ var batchTestTables = schema.Tables{ func TestBatchFlushDifferentMessages(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient) if err != nil { t.Fatal(err) @@ -70,33 +98,40 @@ func TestBatchFlushDifferentMessages(t *testing.T) { if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } - if len(testClient.migrateTables) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.migrateTables)) + + if testClient.MigrateTablesLen() != 0 { + t.Fatalf("expected 0 create table messages, got %d", testClient.MigrateTablesLen()) } + if err := wr.writeAll(ctx, []message.Message{&message.Insert{Record: record}}); err != nil { t.Fatal(err) } - if len(testClient.migrateTables) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.migrateTables)) + + if testClient.MigrateTablesLen() != 1 { + t.Fatalf("expected 1 migrate table messages, got %d", testClient.MigrateTablesLen()) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } + if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } - if len(testClient.inserts) != 1 { - t.Fatalf("expected 1 insert messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 1 { + t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) } + } func TestBatchSize(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient, WithBatchSize(2)) if err != nil { t.Fatal(err) @@ -109,8 +144,8 @@ func TestBatchSize(t *testing.T) { t.Fatal(err) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } if err := wr.writeAll(ctx, []message.Message{&message.Insert{ @@ -121,15 +156,17 @@ func TestBatchSize(t *testing.T) { // we need to wait for the batch to be flushed time.Sleep(time.Second * 2) - if len(testClient.inserts) != 2 { - t.Fatalf("expected 2 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 2 { + t.Fatalf("expected 2 insert messages, got %d", testClient.InsertsLen()) } } func TestBatchTimeout(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient, WithBatchTimeout(time.Second)) if err != nil { t.Fatal(err) @@ -142,29 +179,31 @@ func TestBatchTimeout(t *testing.T) { t.Fatal(err) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } // we need to wait for the batch to be flushed time.Sleep(time.Millisecond * 250) - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } // we need to wait for the batch to be flushed time.Sleep(time.Second * 1) - if len(testClient.inserts) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 1 { + t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) } } func TestBatchUpserts(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient) if err != nil { t.Fatal(err) @@ -178,8 +217,8 @@ func TestBatchUpserts(t *testing.T) { t.Fatal(err) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } if err := wr.writeAll(ctx, []message.Message{&message.Insert{ @@ -190,7 +229,7 @@ func TestBatchUpserts(t *testing.T) { // we need to wait for the batch to be flushed time.Sleep(time.Second * 2) - if len(testClient.inserts) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 1 { + t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) } } From 5df21755c75035b0e9504f0589d0976adff36d5e Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 21:59:45 +0300 Subject: [PATCH 039/125] tests passing --- ...nation_v0_test.go.backup => destination_v0_test.go} | 9 +++++---- ...nation_v1_test.go.backup => destination_v1_test.go} | 1 + serve/plugin.go | 8 ++++---- serve/plugin_test.go | 7 ++++--- writers/batch.go | 10 +++++----- writers/batch_test.go | 3 +-- 6 files changed, 20 insertions(+), 18 deletions(-) rename serve/{destination_v0_test.go.backup => destination_v0_test.go} (95%) rename serve/{destination_v1_test.go.backup => destination_v1_test.go} (98%) diff --git a/serve/destination_v0_test.go.backup b/serve/destination_v0_test.go similarity index 95% rename from serve/destination_v0_test.go.backup rename to serve/destination_v0_test.go index 6c2ca95965..150cbc29fb 100644 --- a/serve/destination_v0_test.go.backup +++ b/serve/destination_v0_test.go @@ -17,6 +17,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -41,7 +42,7 @@ func TestDestination(t *testing.T) { }() // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet1", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -53,10 +54,10 @@ func TestDestination(t *testing.T) { if err != nil { t.Fatal(err) } + if _, err := c.Configure(ctx, &pbBase.Configure_Request{Config: specBytes}); err != nil { t.Fatal(err) } - getNameRes, err := c.GetName(ctx, &pbBase.GetName_Request{}) if err != nil { t.Fatal(err) @@ -117,7 +118,6 @@ func TestDestination(t *testing.T) { }); err != nil { t.Fatal(err) } - if err := writeClient.Send(&pb.Write2_Request{ Resource: destResourceBytes, }); err != nil { @@ -127,6 +127,7 @@ func TestDestination(t *testing.T) { if _, err := writeClient.CloseAndRecv(); err != nil { t.Fatal(err) } + // serversDestination table := serversDestination.TableV2ToV3(tableV2) msgs, err := p.SyncAll(ctx, plugin.SyncOptions{ @@ -148,6 +149,7 @@ func TestDestination(t *testing.T) { if totalResources != 1 { t.Fatalf("expected 1 resource but got %d", totalResources) } + if _, err := c.DeleteStale(ctx, &pb.DeleteStale_Request{ Source: "testSource", Timestamp: timestamppb.New(time.Now().Truncate(time.Microsecond)), @@ -164,7 +166,6 @@ func TestDestination(t *testing.T) { if _, err := c.Close(ctx, &pb.Close_Request{}); err != nil { t.Fatalf("failed to call Close: %v", err) } - cancel() wg.Wait() if serverErr != nil { diff --git a/serve/destination_v1_test.go.backup b/serve/destination_v1_test.go similarity index 98% rename from serve/destination_v1_test.go.backup rename to serve/destination_v1_test.go index d12aea4db1..11e1ab738c 100644 --- a/serve/destination_v1_test.go.backup +++ b/serve/destination_v1_test.go @@ -13,6 +13,7 @@ import ( pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" diff --git a/serve/plugin.go b/serve/plugin.go index 9c55830987..e18a745a37 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -30,7 +30,6 @@ import ( "github.com/rs/zerolog/log" "github.com/spf13/cobra" "github.com/thoas/go-funk" - "golang.org/x/net/netutil" "google.golang.org/grpc" "google.golang.org/grpc/test/bufconn" ) @@ -96,6 +95,7 @@ func (s *PluginServe) Serve(ctx context.Context) error { if err := types.RegisterAllExtensions(); err != nil { return err } + defer types.UnregisterAllExtensions() cmd := s.newCmdPluginRoot() if s.args != nil { cmd.SetArgs(s.args) @@ -132,7 +132,6 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { } else { logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) } - // opts.Plugin.Logger = logger var listener net.Listener if s.testListener { @@ -143,9 +142,10 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) } } + defer listener.Close() // source plugins can only accept one connection at a time // unlike destination plugins that can accept multiple connections - limitListener := netutil.LimitListener(listener, 1) + // limitListener := netutil.LimitListener(listener, 1) // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go grpcServer := grpc.NewServer( grpc.ChainUnaryInterceptor( @@ -226,7 +226,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { }() logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") - if err := grpcServer.Serve(limitListener); err != nil { + if err := grpcServer.Serve(listener); err != nil { return fmt.Errorf("failed to serve: %w", err) } return nil diff --git a/serve/plugin_test.go b/serve/plugin_test.go index e61555a2fc..39e48c808b 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -21,7 +21,7 @@ import ( func TestPluginServe(t *testing.T) { p := plugin.NewPlugin( - "testPlugin", + "testPluginV3", "v1.0.0", memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithTestListener()) @@ -44,14 +44,15 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } + c := pb.NewPluginClient(conn) getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) if err != nil { t.Fatal(err) } - if getNameRes.Name != "testPlugin" { - t.Fatalf("expected name to be testPlugin but got %s", getNameRes.Name) + if getNameRes.Name != "testPluginV3" { + t.Fatalf("expected name to be testPluginV3 but got %s", getNameRes.Name) } getVersionResponse, err := c.GetVersion(ctx, &pb.GetVersion_Request{}) diff --git a/writers/batch.go b/writers/batch.go index e4839490d3..67643bc53f 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -32,12 +32,12 @@ type BatchWriterClient interface { } type BatchWriter struct { - client BatchWriterClient - workers map[string]*worker - workersLock *sync.RWMutex - workersWaitGroup *sync.WaitGroup + client BatchWriterClient + workers map[string]*worker + workersLock *sync.RWMutex + workersWaitGroup *sync.WaitGroup - migrateTableLock *sync.Mutex + migrateTableLock *sync.Mutex migrateTableMessages []*message.MigrateTable deleteStaleLock *sync.Mutex deleteStaleMessages []*message.DeleteStale diff --git a/writers/batch_test.go b/writers/batch_test.go index 636553d8d5..e523f1decb 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -14,7 +14,7 @@ import ( ) type testBatchClient struct { - mutex *sync.Mutex + mutex *sync.Mutex migrateTables []*message.MigrateTable inserts []*message.Insert deleteStales []*message.DeleteStale @@ -115,7 +115,6 @@ func TestBatchFlushDifferentMessages(t *testing.T) { t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } - if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } From ca4de2c912323ddd575dbd9344254e5aa4d21a99 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 22:34:52 +0300 Subject: [PATCH 040/125] Remove GetSpec --- plugin/plugin_destination.go | 1 - plugin/plugin_source.go | 1 - 2 files changed, 2 deletions(-) diff --git a/plugin/plugin_destination.go b/plugin/plugin_destination.go index 2904b2a9a1..68890bbd40 100644 --- a/plugin/plugin_destination.go +++ b/plugin/plugin_destination.go @@ -13,7 +13,6 @@ type WriteOptions struct { } type DestinationClient interface { - GetSpec() any Close(ctx context.Context) error Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index 6b015ae7ea..d6bf744ac7 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -21,7 +21,6 @@ type SyncOptions struct { } type SourceClient interface { - GetSpec() any Close(ctx context.Context) error Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error From 6dd7795638a76c801ad6de1087fc1ada3c2e574a Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 22:58:04 +0300 Subject: [PATCH 041/125] fix destination v0 --- internal/servers/destination/v0/destinations.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index af89f7de26..8e13b14531 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -40,7 +40,11 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) + pluginSpec, err := json.Marshal(s.spec.Spec) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to marshal spec: %v", err) + } + return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, pluginSpec) } func (s *Server) GetName(context.Context, *pbBase.GetName_Request) (*pbBase.GetName_Response, error) { From 16753fdc128ad5b835d3859f027dfe7b75a354a7 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 17 Jun 2023 12:40:31 +0300 Subject: [PATCH 042/125] more wip --- internal/servers/plugin/v3/plugin.go | 5 +- message/message.go | 28 +++++- plugin/plugin_source.go | 1 - plugin/testing_sync.go | 135 --------------------------- scheduler/scheduler.go | 42 +++++++-- scheduler/scheduler_test.go | 4 +- schema/validators.go | 27 ++++++ writers/mixed_batch.go | 6 +- 8 files changed, 96 insertions(+), 152 deletions(-) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 77c41bea29..788aaec743 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -70,9 +70,8 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { ctx := stream.Context() syncOptions := plugin.SyncOptions{ - Tables: req.Tables, - SkipTables: req.SkipTables, - Concurrency: req.Concurrency, + Tables: req.Tables, + SkipTables: req.SkipTables, } if req.StateBackend != nil { diff --git a/message/message.go b/message/message.go index f30f5a4308..e70ced4dbd 100644 --- a/message/message.go +++ b/message/message.go @@ -24,7 +24,7 @@ type Insert struct { Upsert bool } -func (m Insert) GetTable() *schema.Table { +func (m *Insert) GetTable() *schema.Table { table, err := schema.NewTableFromArrowSchema(m.Record.Schema()) if err != nil { panic(err) @@ -63,6 +63,17 @@ func (messages Messages) InsertItems() int64 { return items } +func (messages Messages) InsertMessage() Inserts { + inserts := []*Insert{} + for _, msg := range messages { + switch m := msg.(type) { + case *Insert: + inserts = append(inserts, m) + } + } + return inserts +} + func (m MigrateTables) Exists(tableName string) bool { for _, table := range m { if table.Table.Name == tableName { @@ -85,3 +96,18 @@ func (m Inserts) Exists(tableName string) bool { } return false } + +func (m Inserts) GetRecordsForTable(table *schema.Table) []arrow.Record { + res := []arrow.Record{} + for _, insert := range m { + md := insert.Record.Schema().Metadata() + tableNameMeta, ok := md.GetValue(schema.MetadataTableName) + if !ok { + continue + } + if tableNameMeta == table.Name { + res = append(res, insert.Record) + } + } + return res +} diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index d6bf744ac7..118bece1d8 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -15,7 +15,6 @@ import ( type SyncOptions struct { Tables []string SkipTables []string - Concurrency int64 DeterministicCQID bool StateBackend state.Client } diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 4bea08f8c1..edf045f055 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -1,148 +1,13 @@ package plugin import ( - "context" "fmt" "strings" - "testing" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/message" - "github.com/cloudquery/plugin-sdk/v4/schema" ) -type Validator func(t *testing.T, plugin *Plugin, resources []message.Message) - -func TestPluginSync(t *testing.T, plugin *Plugin, spec []byte, options SyncOptions, opts ...TestPluginOption) { - t.Helper() - - o := &testPluginOptions{ - parallel: true, - validators: []Validator{validatePlugin}, - } - for _, opt := range opts { - opt(o) - } - if o.parallel { - t.Parallel() - } - - resourcesChannel := make(chan message.Message) - var syncErr error - - if err := plugin.Init(context.Background(), spec); err != nil { - t.Fatal(err) - } - - go func() { - defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), options, resourcesChannel) - }() - - syncedResources := make([]message.Message, 0) - for resource := range resourcesChannel { - syncedResources = append(syncedResources, resource) - } - if syncErr != nil { - t.Fatal(syncErr) - } - for _, validator := range o.validators { - validator(t, plugin, syncedResources) - } -} - -type TestPluginOption func(*testPluginOptions) - -func WithTestPluginNoParallel() TestPluginOption { - return func(f *testPluginOptions) { - f.parallel = false - } -} - -func WithTestPluginAdditionalValidators(v Validator) TestPluginOption { - return func(f *testPluginOptions) { - f.validators = append(f.validators, v) - } -} - -type testPluginOptions struct { - parallel bool - validators []Validator -} - -func getTableResources(t *testing.T, table *schema.Table, messages []message.Message) []arrow.Record { - t.Helper() - - tableResources := make([]arrow.Record, 0) - for _, msg := range messages { - switch v := msg.(type) { - case *message.Insert: - md := v.Record.Schema().Metadata() - tableName, ok := md.GetValue(schema.MetadataTableName) - if !ok { - t.Errorf("Expected table name to be set in metadata") - } - if tableName == table.Name { - tableResources = append(tableResources, v.Record) - } - default: - t.Errorf("Unexpected message type %T", v) - } - } - - return tableResources -} - -func validateTable(t *testing.T, table *schema.Table, messages []message.Message) { - t.Helper() - tableResources := getTableResources(t, table, messages) - if len(tableResources) == 0 { - t.Errorf("Expected table %s to be synced but it was not found", table.Name) - return - } - validateResources(t, table, tableResources) -} - -func validatePlugin(t *testing.T, plugin *Plugin, resources []message.Message) { - t.Helper() - tables, err := plugin.Tables(context.Background()) - if err != nil { - t.Fatal(err) - } - for _, table := range tables.FlattenTables() { - validateTable(t, table, resources) - } -} - -// Validates that every column has at least one non-nil value. -// Also does some additional validations. -func validateResources(t *testing.T, table *schema.Table, resources []arrow.Record) { - t.Helper() - - // A set of column-names that have values in at least one of the resources. - columnsWithValues := make([]bool, len(table.Columns)) - - for _, resource := range resources { - for _, arr := range resource.Columns() { - for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - columnsWithValues[i] = true - } - } - } - } - - // Make sure every column has at least one value. - for i, hasValue := range columnsWithValues { - col := table.Columns[i] - emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil - if !hasValue && !emptyExpected && !col.IgnoreInTests { - t.Errorf("table: %s column %s has no values", table.Name, table.Columns[i].Name) - } - } -} - func RecordDiff(l arrow.Record, r arrow.Record) string { var sb strings.Builder if l.NumCols() != r.NumCols() { diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 9d53abbb15..d1dc149804 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -25,6 +25,7 @@ const ( minTableConcurrency = 1 minResourceConcurrency = 100 defaultConcurrency = 200000 + defaultMaxDepth = 4 ) type Strategy int @@ -77,6 +78,12 @@ func WithConcurrency(concurrency uint64) Option { } } +func WithMaxDepth(maxDepth uint64) Option { + return func(s *Scheduler) { + s.maxDepth = maxDepth + } +} + func WithSchedulerStrategy(strategy Strategy) Option { return func(s *Scheduler) { s.strategy = strategy @@ -105,25 +112,46 @@ type Scheduler struct { concurrency uint64 } -func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option) *Scheduler { +func NewScheduler(client schema.ClientMeta, opts ...Option) *Scheduler { s := Scheduler{ - tables: tables, client: client, metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, caser: caser.New(), concurrency: defaultConcurrency, - maxDepth: maxDepth(tables), + maxDepth: defaultMaxDepth, } for _, opt := range opts { opt(&s) } - if s.maxDepth > 3 { - panic(fmt.Errorf("max depth of %d is not supported for scheduler", s.maxDepth)) - } return &s } -func (s *Scheduler) Sync(ctx context.Context, res chan<- message.Message) error { +// SyncAll is mostly used for testing as it will sync all tables and can run out of memory +// in the real world. Should use Sync for production. +func (s *Scheduler) SyncAll(ctx context.Context, tables schema.Tables) (message.Messages, error) { + res := make(chan message.Message) + go func() { + defer close(res) + s.Sync(ctx, tables, res) + }() + var messages []message.Message + for msg := range res { + messages = append(messages, msg) + } + return messages, nil +} + +func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- message.Message) error { + + if len(tables) == 0 { + return nil + } + + if maxDepth(tables) > s.maxDepth { + return fmt.Errorf("max depth exceeded, max depth is %d", s.maxDepth) + } + s.tables = tables + resources := make(chan *schema.Resource) go func() { defer close(resources) diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index fa5aa9669f..ee6e55b801 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -229,9 +229,9 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist WithSchedulerStrategy(strategy), WithDeterministicCQId(deterministicCQID), } - sc := NewScheduler(tables, &c, opts...) + sc := NewScheduler(&c, opts...) msgs := make(chan message.Message, 10) - if err := sc.Sync(ctx, msgs); err != nil { + if err := sc.Sync(ctx, tables, msgs); err != nil { t.Fatal(err) } close(msgs) diff --git a/schema/validators.go b/schema/validators.go index b42f59e223..6116e861a1 100644 --- a/schema/validators.go +++ b/schema/validators.go @@ -3,6 +3,8 @@ package schema import ( "errors" "fmt" + + "github.com/apache/arrow/go/v13/arrow" ) type TableValidator interface { @@ -53,3 +55,28 @@ func validateTableAttributesNameLength(t *Table) error { func (LengthTableValidator) Validate(t *Table) error { return validateTableAttributesNameLength(t) } + +func FindEmptyColumns(table *Table, records []arrow.Record) []string { + columnsWithValues := make([]bool, len(table.Columns)) + emptyColumns := make([]string, 0) + + for _, resource := range records { + for colIndex, arr := range resource.Columns() { + for i := 0; i < arr.Len(); i++ { + if arr.IsValid(i) { + columnsWithValues[colIndex] = true + } + } + } + } + + // Make sure every column has at least one value. + for i, hasValue := range columnsWithValues { + col := table.Columns[i] + emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil + if !hasValue && !emptyExpected && !col.IgnoreInTests { + emptyColumns = append(emptyColumns, col.Name) + } + } + return emptyColumns +} diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index f6704cf488..1ae699a654 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -81,11 +81,11 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption func msgID(msg message.Message) int { switch msg.(type) { - case message.MigrateTable, *message.MigrateTable: + case *message.MigrateTable: return msgTypeMigrateTable - case message.Insert, *message.Insert: + case *message.Insert: return msgTypeInsert - case message.DeleteStale, *message.DeleteStale: + case *message.DeleteStale: return msgTypeDeleteStale } panic("unknown message type: " + reflect.TypeOf(msg).Name()) From 72f0029157ef36b340f6815348d9557806535da6 Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Tue, 20 Jun 2023 20:17:35 +0100 Subject: [PATCH 043/125] discovery versions: int32 instead of uint64 as is such in the proto --- internal/servers/discovery/v1/discovery.go | 2 +- serve/plugin.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/servers/discovery/v1/discovery.go b/internal/servers/discovery/v1/discovery.go index 47c0197ec7..896e8a9cea 100644 --- a/internal/servers/discovery/v1/discovery.go +++ b/internal/servers/discovery/v1/discovery.go @@ -8,7 +8,7 @@ import ( type Server struct { pb.UnimplementedDiscoveryServer - Versions []uint64 + Versions []int32 } func (s *Server) GetVersions(context.Context, *pb.GetVersions_Request) (*pb.GetVersions_Response, error) { diff --git a/serve/plugin.go b/serve/plugin.go index e18a745a37..94466733db 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -177,7 +177,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { Versions: []string{"v0", "v1", "v2", "v3"}, }) pbdiscoveryv1.RegisterDiscoveryServer(grpcServer, &discoveryServerV1.Server{ - Versions: []uint64{0, 1, 2, 3}, + Versions: []int32{0, 1, 2, 3}, }) version := s.plugin.Version() From cae94eeb4cb42d785eb4860ed91530581fff5875 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Wed, 21 Jun 2023 17:20:37 +0100 Subject: [PATCH 044/125] Add test for NewRecordFromBytes --- schema/arrow_test.go | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/schema/arrow_test.go b/schema/arrow_test.go index 377cc5718f..bfe898bc19 100644 --- a/schema/arrow_test.go +++ b/schema/arrow_test.go @@ -1,9 +1,13 @@ package schema import ( + "fmt" + "strings" "testing" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" ) func TestSchemaEncode(t *testing.T) { @@ -42,3 +46,58 @@ func TestSchemaEncode(t *testing.T) { } } } + +func TestRecordToBytesAndNewRecordFromBytes(t *testing.T) { + md := arrow.NewMetadata([]string{"key"}, []string{"value"}) + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64}, + {Name: "name", Type: arrow.BinaryTypes.String}, + }, + &md, + ) + bldr := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer bldr.Release() + bldr.Field(0).AppendValueFromString("1") + bldr.Field(1).AppendValueFromString("foo") + record := bldr.NewRecord() + b, err := RecordToBytes(record) + if err != nil { + t.Fatal(err) + } + decodedRecord, err := NewRecordFromBytes(b) + if err != nil { + t.Fatal(err) + } + numRows := record.NumRows() + if numRows != 1 { + t.Fatalf("expected 1 row, got %d", numRows) + } + if diff := RecordDiff(record, decodedRecord); diff != "" { + t.Fatalf("record differs from expected after NewRecordFromBytes: %v", diff) + } +} + +func RecordDiff(l arrow.Record, r arrow.Record) string { + var sb strings.Builder + if l.NumCols() != r.NumCols() { + return fmt.Sprintf("different number of columns: %d vs %d", l.NumCols(), r.NumCols()) + } + if l.NumRows() != r.NumRows() { + return fmt.Sprintf("different number of rows: %d vs %d", l.NumRows(), r.NumRows()) + } + for i := 0; i < int(l.NumCols()); i++ { + edits, err := array.Diff(l.Column(i), r.Column(i)) + if err != nil { + panic(fmt.Sprintf("left: %v, right: %v, error: %v", l.Column(i).DataType(), r.Column(i).DataType(), err)) + } + diff := edits.UnifiedDiff(l.Column(i), r.Column(i)) + if diff != "" { + sb.WriteString(l.Schema().Field(i).Name) + sb.WriteString(": ") + sb.WriteString(diff) + sb.WriteString("\n") + } + } + return sb.String() +} From 3167a5652be0f4db754d0efa2cb2408548504c50 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 22 Jun 2023 14:44:08 +0100 Subject: [PATCH 045/125] Add better migration support --- go.mod | 1 + .../servers/destination/v0/destinations.go | 2 +- .../servers/destination/v1/destinations.go | 2 +- internal/servers/discovery/v1/discovery.go | 6 ++++- internal/servers/plugin/v3/plugin.go | 11 +++++++--- scheduler/scheduler.go | 8 ++++++- schema/arrow.go | 13 +++++++++-- schema/arrow_test.go | 22 +++++++++++++++++++ schema/table.go | 2 +- serve/destination_v0_test.go | 2 +- serve/destination_v1_test.go | 2 +- 11 files changed, 59 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 985ded470a..c83cabbd5a 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,7 @@ replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13 replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c >>>>>>> 7e5547e (more wip) +replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go require ( diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 8e13b14531..8b97defa3a 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -10,7 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 83f02462d9..99c5a653c5 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" diff --git a/internal/servers/discovery/v1/discovery.go b/internal/servers/discovery/v1/discovery.go index 896e8a9cea..fedb964338 100644 --- a/internal/servers/discovery/v1/discovery.go +++ b/internal/servers/discovery/v1/discovery.go @@ -12,5 +12,9 @@ type Server struct { } func (s *Server) GetVersions(context.Context, *pb.GetVersions_Request) (*pb.GetVersions_Response, error) { - return &pb.GetVersions_Response{Versions: s.Versions}, nil + v := make([]int32, len(s.Versions)) + for i := range s.Versions { + v[i] = int32(s.Versions[i]) + } + return &pb.GetVersions_Response{Versions: v}, nil } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 788aaec743..e57d984a0e 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -82,7 +82,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { if s.NoSentry { opts = append(opts, managedplugin.WithNoSentry()) } - statePlugin, err := managedplugin.NewClient(ctx, managedplugin.Config{ + statePlugin, err := managedplugin.NewClient(ctx, managedplugin.PluginDestination, managedplugin.Config{ Path: req.StateBackend.Path, Registry: managedplugin.Registry(req.StateBackend.Registry), Version: req.StateBackend.Version, @@ -109,12 +109,17 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { for msg := range msgs { switch m := msg.(type) { case *message.MigrateTable: - m.Table.ToArrowSchema() + tableSchema := m.Table.ToArrowSchema() + schemaBytes, err := schema.ToBytes(tableSchema) + if err != nil { + return status.Errorf(codes.Internal, "failed to encode table schema: %v", err) + } pbMsg.Message = &pb.Sync_Response_MigrateTable{ MigrateTable: &pb.MessageMigrateTable{ - Table: nil, + Table: schemaBytes, }, } + case *message.Insert: recordBytes, err := schema.RecordToBytes(m.Record) if err != nil { diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index d1dc149804..d3f767e997 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -142,7 +142,6 @@ func (s *Scheduler) SyncAll(ctx context.Context, tables schema.Tables) (message. } func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- message.Message) error { - if len(tables) == 0 { return nil } @@ -152,6 +151,13 @@ func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- m } s.tables = tables + // send migrate messages first + for _, table := range tables { + res <- &message.MigrateTable{ + Table: table, + } + } + resources := make(chan *schema.Resource) go func() { defer close(resources) diff --git a/schema/arrow.go b/schema/arrow.go index 4baa2a4b86..d3d5e9639e 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -52,6 +52,15 @@ func (s Schemas) Encode() ([][]byte, error) { return ret, nil } +func ToBytes(schema *arrow.Schema) ([]byte, error) { + var buf bytes.Buffer + wr := ipc.NewWriter(&buf, ipc.WithSchema(schema)) + if err := wr.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + func RecordToBytes(record arrow.Record) ([]byte, error) { var buf bytes.Buffer wr := ipc.NewWriter(&buf, ipc.WithSchema(record.Schema())) @@ -77,7 +86,7 @@ func NewRecordFromBytes(b []byte) (arrow.Record, error) { return nil, nil } -func NewSchemaFromBytes(b []byte) (*arrow.Schema, error) { +func NewFromBytes(b []byte) (*arrow.Schema, error) { rdr, err := ipc.NewReader(bytes.NewReader(b)) if err != nil { return nil, err @@ -89,7 +98,7 @@ func NewSchemasFromBytes(b [][]byte) (Schemas, error) { var err error ret := make([]*arrow.Schema, len(b)) for i, buf := range b { - ret[i], err = NewSchemaFromBytes(buf) + ret[i], err = NewFromBytes(buf) if err != nil { return nil, err } diff --git a/schema/arrow_test.go b/schema/arrow_test.go index bfe898bc19..184161f6dd 100644 --- a/schema/arrow_test.go +++ b/schema/arrow_test.go @@ -78,6 +78,28 @@ func TestRecordToBytesAndNewRecordFromBytes(t *testing.T) { } } +func TestSchemaToBytesAndNewSchemaFromBytes(t *testing.T) { + md := arrow.NewMetadata([]string{"key"}, []string{"value"}) + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64}, + {Name: "name", Type: arrow.BinaryTypes.String}, + }, + &md, + ) + b, err := ToBytes(schema) + if err != nil { + t.Fatal(err) + } + decodedSchema, err := NewFromBytes(b) + if err != nil { + t.Fatal(err) + } + if !schema.Equal(decodedSchema) { + t.Fatalf("schema differs from expected after NewSchemaFromBytes. \nBefore: %v,\nAfter: %v", schema, decodedSchema) + } +} + func RecordDiff(l arrow.Record, r arrow.Record) string { var sb strings.Builder if l.NumCols() != r.NumCols() { diff --git a/schema/table.go b/schema/table.go index 76a7384650..b36c02b6a7 100644 --- a/schema/table.go +++ b/schema/table.go @@ -109,7 +109,7 @@ func NewTablesFromArrowSchemas(schemas []*arrow.Schema) (Tables, error) { } func NewTableFromBytes(b []byte) (*Table, error) { - sc, err := NewSchemaFromBytes(b) + sc, err := NewFromBytes(b) if err != nil { return nil, err } diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 150cbc29fb..96b7565c82 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 11e1ab738c..cafb1f7cdf 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" From 1969467449d7b7107b00a57b884ae95a5b79bc40 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 22 Jun 2023 11:40:44 +0300 Subject: [PATCH 046/125] commit fixes --- go.mod | 6 +----- go.sum | 6 ++++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index c83cabbd5a..d7569b6f06 100644 --- a/go.mod +++ b/go.mod @@ -18,18 +18,13 @@ require ( github.com/stretchr/testify v1.8.4 github.com/thoas/go-funk v0.9.3 golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 - golang.org/x/net v0.9.0 golang.org/x/sync v0.1.0 golang.org/x/text v0.9.0 google.golang.org/grpc v1.55.0 google.golang.org/protobuf v1.30.0 ) -<<<<<<< HEAD -replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 -======= replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c ->>>>>>> 7e5547e (more wip) replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go @@ -59,6 +54,7 @@ require ( github.com/spf13/pflag v1.0.5 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/mod v0.8.0 // indirect + golang.org/x/net v0.9.0 // indirect golang.org/x/sys v0.7.0 // indirect golang.org/x/term v0.7.0 // indirect golang.org/x/tools v0.6.0 // indirect diff --git a/go.sum b/go.sum index 6d7acabc3e..7eb5068cd3 100644 --- a/go.sum +++ b/go.sum @@ -214,6 +214,12 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +<<<<<<< HEAD +======= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +>>>>>>> c0b7ea6 (commit fixes) github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= From 07d79cd034969723683cc488b9b82cd50d2111f1 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 22 Jun 2023 22:54:07 +0300 Subject: [PATCH 047/125] migrate flattend tables --- scheduler/scheduler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index d3f767e997..be5a7a1e71 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -152,7 +152,7 @@ func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- m s.tables = tables // send migrate messages first - for _, table := range tables { + for _, table := range tables.FlattenTables() { res <- &message.MigrateTable{ Table: table, } From dbd714230be5322bf4650cf8229bdff934bff8e6 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 23 Jun 2023 10:59:13 +0100 Subject: [PATCH 048/125] Remove upserts --- internal/servers/plugin/v3/plugin.go | 2 -- internal/servers/plugin/v3/state.go | 1 - 2 files changed, 3 deletions(-) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index e57d984a0e..bc902f2272 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -128,7 +128,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { pbMsg.Message = &pb.Sync_Response_Insert{ Insert: &pb.MessageInsert{ Record: recordBytes, - Upsert: m.Upsert, }, } case *message.DeleteStale: @@ -217,7 +216,6 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } pluginMessage = &message.Insert{ Record: record, - Upsert: pbMsg.Insert.Upsert, } case *pb.Write_Request_Delete: table, err := schema.NewTableFromBytes(pbMsg.Delete.Table) diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index f7a9015433..146e646248 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -154,7 +154,6 @@ func (c *ClientV3) flush(ctx context.Context) error { Message: &pbPlugin.Write_Request_Insert{ Insert: &pbPlugin.MessageInsert{ Record: buf.Bytes(), - Upsert: true, }, }, }); err != nil { From 701338b366bb6b68288e8e7a53ab709a93c765be Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:17:20 +0300 Subject: [PATCH 049/125] fix tests --- go.mod | 7 ++----- go.sum | 2 ++ scheduler/scheduler_test.go | 10 +++------- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index d7569b6f06..f6d8df335b 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,9 @@ module github.com/cloudquery/plugin-sdk/v4 go 1.19 require ( - github.com/apache/arrow/go/v13 v13.0.0-20230601214540-018e7d3f9c4b + github.com/apache/arrow/go/v13 v13.0.0-20230622042343-ec413b7763fe github.com/bradleyjkemp/cupaloy/v2 v2.8.0 - github.com/cloudquery/plugin-pb-go v1.1.0 + github.com/cloudquery/plugin-pb-go v1.2.1 github.com/cloudquery/plugin-sdk/v2 v2.7.0 github.com/getsentry/sentry-go v0.20.0 github.com/goccy/go-json v0.10.0 @@ -26,9 +26,6 @@ require ( replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c -replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go -replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go - require ( github.com/andybalholm/brotli v1.0.5 // indirect github.com/apache/thrift v0.16.0 // indirect diff --git a/go.sum b/go.sum index 7eb5068cd3..f3e63c76e2 100644 --- a/go.sum +++ b/go.sum @@ -49,6 +49,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c h1:nQSB4v0QxCW5XDLvVBcaNrsJ+J/esMBoFYjymllxM1E= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= +github.com/cloudquery/plugin-pb-go v1.2.1 h1:Ewsg70dkB/f+hzeqKNhEslX0u+1zG01eb4kQ8V9d2dk= +github.com/cloudquery/plugin-pb-go v1.2.1/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index ee6e55b801..c8a19fb856 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -238,12 +238,6 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist var i int for msg := range msgs { - if tc.data == nil { - t.Fatalf("Unexpected message %v", msg) - } - if i >= len(tc.data) { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } switch v := msg.(type) { case *message.Insert: record := v.Record @@ -252,8 +246,10 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) } i++ + case *message.MigrateTable: + // ignore default: - t.Fatalf("expected insert message. got %v", msg) + t.Fatalf("expected insert message. got %T", msg) } } if len(tc.data) != i { From 75af24931824e0cd8c1ed25d31e1f567934482ae Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:55:55 +0300 Subject: [PATCH 050/125] rebase complete --- go.mod | 1 + go.sum | 14 +++++--------- plugin/diff.go | 2 +- plugin/nulls.go | 2 +- plugin/testing_sync.go | 33 --------------------------------- scalar/scalar.go | 2 ++ scalar/string.go | 3 +++ scheduler/scheduler_test.go | 18 +++++++++--------- schema/testdata.go | 1 - 9 files changed, 22 insertions(+), 54 deletions(-) delete mode 100644 plugin/testing_sync.go diff --git a/go.mod b/go.mod index f6d8df335b..4f0776f201 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,7 @@ require ( github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v23.1.21+incompatible // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/klauspost/asmfmt v1.3.2 // indirect github.com/klauspost/compress v1.16.0 // indirect github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/mattn/go-colorable v0.1.13 // indirect diff --git a/go.sum b/go.sum index f3e63c76e2..37d20fb479 100644 --- a/go.sum +++ b/go.sum @@ -89,6 +89,7 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= +github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -106,6 +107,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/flatbuffers v23.1.21+incompatible h1:bUqzx/MXCDxuS0hRJL2EfjyZL3uQrPbMocUa8zGqsTA= @@ -151,6 +154,8 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= @@ -168,8 +173,6 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -<<<<<<< HEAD -======= github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= @@ -178,7 +181,6 @@ github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8D github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= ->>>>>>> 446b805 (wip) github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= @@ -206,22 +208,16 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -<<<<<<< HEAD -======= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= ->>>>>>> 446b805 (wip) github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -<<<<<<< HEAD -======= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= ->>>>>>> c0b7ea6 (commit fixes) github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= diff --git a/plugin/diff.go b/plugin/diff.go index dc3c555ce0..343de8d19b 100644 --- a/plugin/diff.go +++ b/plugin/diff.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "fmt" diff --git a/plugin/nulls.go b/plugin/nulls.go index 02d80a5f1c..e60a88a24f 100644 --- a/plugin/nulls.go +++ b/plugin/nulls.go @@ -69,4 +69,4 @@ func (f AllowNullFunc) replaceNullsByEmpty(records []arrow.Record) { } records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) } -} \ No newline at end of file +} diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go deleted file mode 100644 index edf045f055..0000000000 --- a/plugin/testing_sync.go +++ /dev/null @@ -1,33 +0,0 @@ -package plugin - -import ( - "fmt" - "strings" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" -) - -func RecordDiff(l arrow.Record, r arrow.Record) string { - var sb strings.Builder - if l.NumCols() != r.NumCols() { - return fmt.Sprintf("different number of columns: %d vs %d", l.NumCols(), r.NumCols()) - } - if l.NumRows() != r.NumRows() { - return fmt.Sprintf("different number of rows: %d vs %d", l.NumRows(), r.NumRows()) - } - for i := 0; i < int(l.NumCols()); i++ { - edits, err := array.Diff(l.Column(i), r.Column(i)) - if err != nil { - panic(fmt.Sprintf("left: %v, right: %v, error: %v", l.Column(i).DataType(), r.Column(i).DataType(), err)) - } - diff := edits.UnifiedDiff(l.Column(i), r.Column(i)) - if diff != "" { - sb.WriteString(l.Schema().Field(i).Name) - sb.WriteString(": ") - sb.WriteString(diff) - sb.WriteString("\n") - } - } - return sb.String() -} diff --git a/scalar/scalar.go b/scalar/scalar.go index d80c1a2e5e..7236cd7109 100644 --- a/scalar/scalar.go +++ b/scalar/scalar.go @@ -5,8 +5,10 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/float16" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/types" + "golang.org/x/exp/maps" ) // Scalar represents a single value of a specific DataType as opposed to diff --git a/scalar/string.go b/scalar/string.go index 0d191d844e..7997aded97 100644 --- a/scalar/string.go +++ b/scalar/string.go @@ -4,8 +4,11 @@ import ( "fmt" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" ) +const nullValueStr = array.NullValueStr + type String struct { Valid bool Value string diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index c8a19fb856..1fe5bc57ea 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -139,7 +139,7 @@ var syncTestCases = []syncTestCase{ table: testTableSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, }, @@ -156,10 +156,10 @@ var syncTestCases = []syncTestCase{ table: testTableRelationSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, }, @@ -167,7 +167,7 @@ var syncTestCases = []syncTestCase{ table: testTableSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, deterministicCQID: true, @@ -176,8 +176,8 @@ var syncTestCases = []syncTestCase{ table: testTableColumnResolverPanic(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, - &scalar.Int64{}, + &scalar.Int{Value: 3, Valid: true}, + &scalar.Int{}, }, }, // deterministicCQID: true, @@ -186,10 +186,10 @@ var syncTestCases = []syncTestCase{ table: testTableRelationSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, // deterministicCQID: true, @@ -198,7 +198,7 @@ var syncTestCases = []syncTestCase{ table: testTableSuccessWithPK(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, // deterministicCQID: true, diff --git a/schema/testdata.go b/schema/testdata.go index c592ddc40a..af79a95f5e 100644 --- a/schema/testdata.go +++ b/schema/testdata.go @@ -21,7 +21,6 @@ import ( // TestSourceOptions controls which types are included by TestSourceColumns. type TestSourceOptions struct { SkipDates bool - SkipDecimals bool SkipDurations bool SkipIntervals bool SkipLargeTypes bool // e.g. large binary, large string From a245984b372583e76d0601219413b210bf38a90c Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 28 May 2023 20:57:01 +0300 Subject: [PATCH 051/125] move to one plugin wip --- go.mod | 2 + go.sum | 6 +- internal/servers/plugin/v0/plugin.go | 295 +++++++++++ plugin/benchmark_test.go | 429 ++++++++++++++++ plugin/docs.go | 242 +++++++++ plugin/docs_test.go | 164 ++++++ plugin/metrics.go | 125 +++++ plugin/metrics_test.go | 37 ++ plugin/options.go | 46 ++ plugin/plugin.go | 326 ++++++++++++ plugin/plugin_round_robin_test.go | 148 ++++++ plugin/plugin_test.go | 470 ++++++++++++++++++ plugin/scheduler.go | 163 ++++++ plugin/scheduler_dfs.go | 230 +++++++++ plugin/scheduler_round_robin.go | 104 ++++ plugin/scheduler_round_robin_test.go | 65 +++ plugin/templates/all_tables.md.go.tpl | 5 + plugin/templates/all_tables_entry.md.go.tpl | 5 + plugin/templates/table.md.go.tpl | 44 ++ .../TestGeneratePluginDocs-JSON-__tables.json | 214 ++++++++ .../TestGeneratePluginDocs-Markdown-README.md | 10 + ...tePluginDocs-Markdown-incremental_table.md | 20 + ...Docs-Markdown-relation_relation_table_a.md | 21 + ...Docs-Markdown-relation_relation_table_b.md | 21 + ...eratePluginDocs-Markdown-relation_table.md | 25 + ...tGeneratePluginDocs-Markdown-test_table.md | 29 ++ plugin/testing.go | 141 ++++++ plugin/validate.go | 27 + serve/plugin.go | 235 +++++++++ serve/plugin_test.go | 238 +++++++++ 30 files changed, 3883 insertions(+), 4 deletions(-) create mode 100644 internal/servers/plugin/v0/plugin.go create mode 100644 plugin/benchmark_test.go create mode 100644 plugin/docs.go create mode 100644 plugin/docs_test.go create mode 100644 plugin/metrics.go create mode 100644 plugin/metrics_test.go create mode 100644 plugin/options.go create mode 100644 plugin/plugin.go create mode 100644 plugin/plugin_round_robin_test.go create mode 100644 plugin/plugin_test.go create mode 100644 plugin/scheduler.go create mode 100644 plugin/scheduler_dfs.go create mode 100644 plugin/scheduler_round_robin.go create mode 100644 plugin/scheduler_round_robin_test.go create mode 100644 plugin/templates/all_tables.md.go.tpl create mode 100644 plugin/templates/all_tables_entry.md.go.tpl create mode 100644 plugin/templates/table.md.go.tpl create mode 100644 plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-README.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md create mode 100644 plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md create mode 100644 plugin/testing.go create mode 100644 plugin/validate.go create mode 100644 serve/plugin.go create mode 100644 serve/plugin_test.go diff --git a/go.mod b/go.mod index d33f94436f..fab4f9e1b3 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,8 @@ require ( replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 +replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go + require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect diff --git a/go.sum b/go.sum index 0b899cd71c..8f7dfaf7d1 100644 --- a/go.sum +++ b/go.sum @@ -40,10 +40,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 h1:8eQrRKCk6OwCiIW43+Y10p2nkTdTATu5kqXEA7iBlg8= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= -github.com/cloudquery/plugin-pb-go v1.2.0 h1:p8Q3nitSC9zMI+YJ4/90LM4e3i2BahHOEK4Dhkl64vE= -github.com/cloudquery/plugin-pb-go v1.2.0/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSEGQNLHpUQ5cU4L4aF7cuJZRnc1toIIWqC1gmPg= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= diff --git a/internal/servers/plugin/v0/plugin.go b/internal/servers/plugin/v0/plugin.go new file mode 100644 index 0000000000..d00b16059c --- /dev/null +++ b/internal/servers/plugin/v0/plugin.go @@ -0,0 +1,295 @@ +package plugin + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/plugin" + "github.com/cloudquery/plugin-sdk/v3/plugins/source" + "github.com/cloudquery/plugin-sdk/v3/scalar" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/getsentry/sentry-go" + "github.com/rs/zerolog" + "golang.org/x/sync/errgroup" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" +) + +const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB + +type Server struct { + pb.UnimplementedPluginServer + Plugin *plugin.Plugin + Logger zerolog.Logger + spec pb.Spec +} + +func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { + tables := s.Plugin.StaticTables().ToArrowSchemas() + encoded, err := tables.Encode() + if err != nil { + return nil, fmt.Errorf("failed to encode tables: %w", err) + } + return &pb.GetStaticTables_Response{ + Tables: encoded, + }, nil +} + +func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { + // TODO: Fix this + tables := s.Plugin.StaticTables().ToArrowSchemas() + encoded, err := tables.Encode() + if err != nil { + return nil, fmt.Errorf("failed to encode tables: %w", err) + } + return &pb.GetDynamicTables_Response{ + Tables: encoded, + }, nil +} + +func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { + return &pb.GetName_Response{ + Name: s.Plugin.Name(), + }, nil +} + +func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVersion_Response, error) { + return &pb.GetVersion_Response{ + Version: s.Plugin.Version(), + }, nil +} + +func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { + if err := s.Plugin.Init(ctx, *req.Spec); err != nil { + return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) + } + s.spec = *req.Spec + return &pb.Init_Response{}, nil +} + +func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { + resources := make(chan *schema.Resource) + var syncErr error + ctx := stream.Context() + + go func() { + defer close(resources) + err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, resources) + if err != nil { + syncErr = fmt.Errorf("failed to sync resources: %w", err) + } + }() + + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + + var buf bytes.Buffer + w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) + if err := w.Write(rec); err != nil { + return status.Errorf(codes.Internal, "failed to write record: %v", err) + } + if err := w.Close(); err != nil { + return status.Errorf(codes.Internal, "failed to close writer: %v", err) + } + + msg := &pb.Sync_Response{ + Resource: buf.Bytes(), + } + err := checkMessageSize(msg, resource) + if err != nil { + s.Logger.Warn().Str("table", resource.Table.Name). + Int("bytes", len(msg.String())). + Msg("Row exceeding max bytes ignored") + continue + } + if err := stream.Send(msg); err != nil { + return status.Errorf(codes.Internal, "failed to send resource: %v", err) + } + } + + return syncErr +} + +func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMetrics_Response, error) { + // Aggregate metrics before sending to keep response size small. + // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 + m := s.Plugin.Metrics() + agg := &source.TableClientMetrics{} + for _, table := range m.TableClient { + for _, tableClient := range table { + agg.Resources += tableClient.Resources + agg.Errors += tableClient.Errors + agg.Panics += tableClient.Panics + } + } + b, err := json.Marshal(&source.Metrics{ + TableClient: map[string]map[string]*source.TableClientMetrics{"": {"": agg}}, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal source metrics: %w", err) + } + return &pb.GetMetrics_Response{ + Metrics: b, + }, nil +} + +func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migrate_Response, error) { + schemas, err := schema.NewSchemasFromBytes(req.Tables) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) + } + tables, err := schema.NewTablesFromArrowSchemas(schemas) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) + } + s.setPKsForTables(tables) + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) +} + +func (s *Server) Write(msg pb.Plugin_WriteServer) error { + resources := make(chan arrow.Record) + + r, err := msg.Recv() + if err != nil { + if err == io.EOF { + return msg.SendAndClose(&pb.Write_Response{}) + } + return status.Errorf(codes.Internal, "failed to receive msg: %v", err) + } + + schemas, err := schema.NewSchemasFromBytes(r.Tables) + if err != nil { + return status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) + } + tables, err := schema.NewTablesFromArrowSchemas(schemas) + if err != nil { + return status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) + } + s.setPKsForTables(tables) + sourceSpec := *r.SourceSpec + syncTime := r.Timestamp.AsTime() + eg, ctx := errgroup.WithContext(msg.Context()) + eg.Go(func() error { + return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + }) + + for { + r, err := msg.Recv() + if err == io.EOF { + close(resources) + if err := eg.Wait(); err != nil { + return status.Errorf(codes.Internal, "write failed: %v", err) + } + return msg.SendAndClose(&pb.Write_Response{}) + } + if err != nil { + close(resources) + if wgErr := eg.Wait(); wgErr != nil { + return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) + } + return status.Errorf(codes.Internal, "failed to receive msg: %v", err) + } + rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) + if err != nil { + close(resources) + if wgErr := eg.Wait(); wgErr != nil { + return status.Errorf(codes.InvalidArgument, "failed to create reader: %v and write failed: %v", err, wgErr) + } + return status.Errorf(codes.InvalidArgument, "failed to create reader: %v", err) + } + for rdr.Next() { + rec := rdr.Record() + rec.Retain() + select { + case resources <- rec: + case <-ctx.Done(): + close(resources) + if err := eg.Wait(); err != nil { + return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) + } + return status.Errorf(codes.Internal, "Context done: %v", ctx.Err()) + } + } + if err := rdr.Err(); err != nil { + return status.Errorf(codes.InvalidArgument, "failed to read resource: %v", err) + } + } +} + +func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { + tmpDir := os.TempDir() + defer os.RemoveAll(tmpDir) + err := s.Plugin.GeneratePluginDocs(s.Plugin.StaticTables(), tmpDir, req.Format) + if err != nil { + return fmt.Errorf("failed to generate docs: %w", err) + } + + // list files in tmpDir + files, err := ioutil.ReadDir(tmpDir) + if err != nil { + return fmt.Errorf("failed to read tmp dir: %w", err) + } + for _, f := range files { + if f.IsDir() { + continue + } + content, err := os.ReadFile(filepath.Join(tmpDir, f.Name())) + if err != nil { + return fmt.Errorf("failed to read file: %w", err) + } + if err := srv.Send(&pb.GenDocs_Response{ + Filename: f.Name(), + Content: content, + }); err != nil { + return fmt.Errorf("failed to send file: %w", err) + } + } + return nil +} + +func checkMessageSize(msg proto.Message, resource *schema.Resource) error { + size := proto.Size(msg) + // log error to Sentry if row exceeds half of the max size + if size > MaxMsgSize/2 { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetExtra("bytes", size) + sentry.CurrentHub().CaptureMessage("Large message detected") + }) + } + if size > MaxMsgSize { + return errors.New("message exceeds max size") + } + return nil +} + +func (s *Server) setPKsForTables(tables schema.Tables) { + if s.spec.WriteSpec.PkMode == pb.WriteSpec_CQ_ID_ONLY { + setCQIDAsPrimaryKeysForTables(tables) + } +} + +func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { + for _, table := range tables { + for i, col := range table.Columns { + table.Columns[i].PrimaryKey = col.Name == schema.CqIDColumn.Name + } + setCQIDAsPrimaryKeysForTables(table.Relations) + } +} \ No newline at end of file diff --git a/plugin/benchmark_test.go b/plugin/benchmark_test.go new file mode 100644 index 0000000000..36a86cd3cd --- /dev/null +++ b/plugin/benchmark_test.go @@ -0,0 +1,429 @@ +package plugin + +import ( + "context" + "fmt" + "math/rand" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" + "golang.org/x/sync/errgroup" +) + +type BenchmarkScenario struct { + Client Client + Scheduler specs.Scheduler + Clients int + Tables int + ChildrenPerTable int + Columns int + ColumnResolvers int // number of columns with custom resolvers + ResourcesPerTable int + ResourcesPerPage int + NoPreResourceResolver bool + Concurrency uint64 +} + +func (s *BenchmarkScenario) SetDefaults() { + if s.Clients == 0 { + s.Clients = 1 + } + if s.Tables == 0 { + s.Tables = 1 + } + if s.Columns == 0 { + s.Columns = 10 + } + if s.ResourcesPerTable == 0 { + s.ResourcesPerTable = 100 + } + if s.ResourcesPerPage == 0 { + s.ResourcesPerPage = 10 + } +} + +type ClientTest interface { + Call(clientID, tableName string) error +} + +type Benchmark struct { + *BenchmarkScenario + + b *testing.B + tables []*schema.Table + plugin *Plugin + + apiCalls atomic.Int64 +} + +func NewBenchmark(b *testing.B, scenario BenchmarkScenario) *Benchmark { + scenario.SetDefaults() + sb := &Benchmark{ + BenchmarkScenario: &scenario, + b: b, + tables: nil, + plugin: nil, + } + sb.setup(b) + return sb +} + +func (s *Benchmark) setup(b *testing.B) { + createResolvers := func(tableName string) (schema.TableResolver, schema.RowResolver, schema.ColumnResolver) { + tableResolver := func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { + total := 0 + for total < s.ResourcesPerTable { + s.simulateAPICall(meta.ID(), tableName) + num := min(s.ResourcesPerPage, s.ResourcesPerTable-total) + resources := make([]struct { + Column1 string + }, num) + for i := 0; i < num; i++ { + resources[i] = struct { + Column1 string + }{ + Column1: "test-column", + } + } + res <- resources + total += num + } + return nil + } + preResourceResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource) error { + s.simulateAPICall(meta.ID(), tableName) + resource.Item = struct { + Column1 string + }{ + Column1: "test-pre", + } + return nil + } + columnResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + s.simulateAPICall(meta.ID(), tableName) + return resource.Set(c.Name, "test") + } + return tableResolver, preResourceResolver, columnResolver + } + + s.tables = make([]*schema.Table, s.Tables) + for i := 0; i < s.Tables; i++ { + tableResolver, preResourceResolver, columnResolver := createResolvers(fmt.Sprintf("table%d", i)) + columns := make([]schema.Column, s.Columns) + for u := 0; u < s.Columns; u++ { + columns[u] = schema.Column{ + Name: fmt.Sprintf("column%d", u), + Type: arrow.BinaryTypes.String, + } + if u < s.ColumnResolvers { + columns[u].Resolver = columnResolver + } + } + relations := make([]*schema.Table, s.ChildrenPerTable) + for u := 0; u < s.ChildrenPerTable; u++ { + relations[u] = &schema.Table{ + Name: fmt.Sprintf("table%d_child%d", i, u), + Columns: columns, + Resolver: tableResolver, + } + if !s.NoPreResourceResolver { + relations[u].PreResourceResolver = preResourceResolver + } + } + s.tables[i] = &schema.Table{ + Name: fmt.Sprintf("table%d", i), + Columns: columns, + Relations: relations, + Resolver: tableResolver, + Multiplex: nMultiplexer(s.Clients), + } + if !s.NoPreResourceResolver { + s.tables[i].PreResourceResolver = preResourceResolver + } + for u := range relations { + relations[u].Parent = s.tables[i] + } + } + + plugin := NewPlugin( + "testPlugin", + "1.0.0", + s.tables, + newTestExecutionClient, + ) + plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(b)).Level(zerolog.WarnLevel)) + s.plugin = plugin + s.b = b +} + +func (s *Benchmark) simulateAPICall(clientID, tableName string) { + for { + s.apiCalls.Add(1) + err := s.Client.Call(clientID, tableName) + if err == nil { + // if no error, we are done + break + } + // if error, we have to retry + // we simulate a random backoff + time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) + } +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func (s *Benchmark) Run() { + for n := 0; n < s.b.N; n++ { + s.b.StopTimer() + ctx := context.Background() + spec := specs.Source{ + Name: "testSource", + Path: "cloudquery/testSource", + Tables: []string{"*"}, + Version: "v1.0.0", + Destinations: []string{"test"}, + Concurrency: s.Concurrency, + Scheduler: s.Scheduler, + } + if err := s.plugin.Init(ctx, spec); err != nil { + s.b.Fatal(err) + } + resources := make(chan *schema.Resource) + g, ctx := errgroup.WithContext(ctx) + g.Go(func() error { + defer close(resources) + return s.plugin.Sync(ctx, + time.Now(), + resources) + }) + s.b.StartTimer() + start := time.Now() + + totalResources := 0 + for range resources { + // read resources channel until empty + totalResources++ + } + if err := g.Wait(); err != nil { + s.b.Fatal(err) + } + + end := time.Now() + s.b.ReportMetric(0, "ns/op") // drop default ns/op output + s.b.ReportMetric(float64(totalResources)/(end.Sub(start).Seconds()), "resources/s") + + // Enable the below metrics for more verbose information about the scenario: + // s.b.ReportMetric(float64(s.apiCalls.Load())/(end.Sub(start).Seconds()), "api-calls/s") + // s.b.ReportMetric(float64(totalResources), "resources") + // s.b.ReportMetric(float64(s.apiCalls.Load()), "apiCalls") + } +} + +type benchmarkClient struct { + num int +} + +func (b benchmarkClient) ID() string { + return fmt.Sprintf("client%d", b.num) +} + +func nMultiplexer(n int) schema.Multiplexer { + return func(meta schema.ClientMeta) []schema.ClientMeta { + clients := make([]schema.ClientMeta, n) + for i := 0; i < n; i++ { + clients[i] = benchmarkClient{ + num: i, + } + } + return clients + } +} + +func BenchmarkDefaultConcurrencyDFS(b *testing.B) { + benchmarkWithScheduler(b, specs.SchedulerDFS) +} + +func BenchmarkDefaultConcurrencyRoundRobin(b *testing.B) { + benchmarkWithScheduler(b, specs.SchedulerRoundRobin) +} + +func benchmarkWithScheduler(b *testing.B, scheduler specs.Scheduler) { + b.ReportAllocs() + minTime := 1 * time.Millisecond + mean := 10 * time.Millisecond + stdDev := 100 * time.Millisecond + client := NewDefaultClient(minTime, mean, stdDev) + bs := BenchmarkScenario{ + Client: client, + Clients: 25, + Tables: 5, + Columns: 10, + ColumnResolvers: 1, + ResourcesPerTable: 100, + ResourcesPerPage: 50, + Scheduler: scheduler, + } + sb := NewBenchmark(b, bs) + sb.Run() +} + +func BenchmarkTablesWithChildrenDFS(b *testing.B) { + benchmarkTablesWithChildrenScheduler(b, specs.SchedulerDFS) +} + +func BenchmarkTablesWithChildrenRoundRobin(b *testing.B) { + benchmarkTablesWithChildrenScheduler(b, specs.SchedulerRoundRobin) +} + +func benchmarkTablesWithChildrenScheduler(b *testing.B, scheduler specs.Scheduler) { + b.ReportAllocs() + minTime := 1 * time.Millisecond + mean := 10 * time.Millisecond + stdDev := 100 * time.Millisecond + client := NewDefaultClient(minTime, mean, stdDev) + bs := BenchmarkScenario{ + Client: client, + Clients: 2, + Tables: 2, + ChildrenPerTable: 2, + Columns: 10, + ColumnResolvers: 1, + ResourcesPerTable: 100, + ResourcesPerPage: 50, + Scheduler: scheduler, + } + sb := NewBenchmark(b, bs) + sb.Run() +} + +type DefaultClient struct { + min, stdDev, mean time.Duration +} + +func NewDefaultClient(min, mean, stdDev time.Duration) *DefaultClient { + if min == 0 { + min = time.Millisecond + } + if mean == 0 { + mean = 10 * time.Millisecond + } + if stdDev == 0 { + stdDev = 100 * time.Millisecond + } + return &DefaultClient{ + min: min, + mean: mean, + stdDev: stdDev, + } +} + +func (c *DefaultClient) Call(_, _ string) error { + sample := int(rand.NormFloat64()*float64(c.stdDev) + float64(c.mean)) + duration := time.Duration(sample) + if duration < c.min { + duration = c.min + } + time.Sleep(duration) + return nil +} + +type RateLimitClient struct { + *DefaultClient + calls map[string][]time.Time + callsLock sync.Mutex + window time.Duration + maxCallsPerWindow int +} + +func NewRateLimitClient(min, mean, stdDev time.Duration, maxCallsPerWindow int, window time.Duration) *RateLimitClient { + return &RateLimitClient{ + DefaultClient: NewDefaultClient(min, mean, stdDev), + calls: map[string][]time.Time{}, + window: window, + maxCallsPerWindow: maxCallsPerWindow, + } +} + +func (r *RateLimitClient) Call(clientID, table string) error { + // this will sleep for the appropriate amount of time before responding + err := r.DefaultClient.Call(clientID, table) + if err != nil { + return err + } + + r.callsLock.Lock() + defer r.callsLock.Unlock() + + // limit the number of calls per window by table + key := table + + // remove calls from outside the call window + updated := make([]time.Time, 0, len(r.calls[key])) + for i := range r.calls[key] { + if time.Since(r.calls[key][i]) < r.window { + updated = append(updated, r.calls[key][i]) + } + } + + // return error if we've exceeded the max calls in the time window + if len(updated) >= r.maxCallsPerWindow { + return fmt.Errorf("rate limit exceeded") + } + + r.calls[key] = append(r.calls[key], time.Now()) + return nil +} + +// BenchmarkDefaultConcurrency represents a benchmark scenario where rate limiting is applied +// by the cloud provider. In this rate limiter, the limit is applied globally per table. +// This mirrors the behavior of GCP, where rate limiting is applied per project *token*, not +// per project. A good scheduler should spread the load across tables so that other tables can make +// progress while waiting for the rate limit to reset. +func BenchmarkTablesWithRateLimitingDFS(b *testing.B) { + benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerDFS) +} + +func BenchmarkTablesWithRateLimitingRoundRobin(b *testing.B) { + benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerRoundRobin) +} + +// In this benchmark, we set up a scenario where each table has a global rate limit of 1 call per 100ms. +// Every table requires 1 call to resolve, and has 10 clients. This means, at best, each table can resolve in 1 second. +// We have 100 such tables and a concurrency that allows 1000 calls at a time. A good scheduler for this scenario +// should be able to resolve all tables in a bit more than 1 second. +func benchmarkTablesWithRateLimitingScheduler(b *testing.B, scheduler specs.Scheduler) { + b.ReportAllocs() + minTime := 1 * time.Millisecond + mean := 1 * time.Millisecond + stdDev := 1 * time.Millisecond + maxCallsPerWindow := 1 + window := 100 * time.Millisecond + c := NewRateLimitClient(minTime, mean, stdDev, maxCallsPerWindow, window) + + bs := BenchmarkScenario{ + Client: c, + Scheduler: scheduler, + Clients: 10, + Tables: 100, + ChildrenPerTable: 0, + Columns: 10, + ColumnResolvers: 0, + ResourcesPerTable: 1, + ResourcesPerPage: 1, + Concurrency: 1000, + NoPreResourceResolver: true, + } + sb := NewBenchmark(b, bs) + sb.Run() +} diff --git a/plugin/docs.go b/plugin/docs.go new file mode 100644 index 0000000000..5827e5edcf --- /dev/null +++ b/plugin/docs.go @@ -0,0 +1,242 @@ +package plugin + +import ( + "bytes" + "embed" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "text/template" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/caser" + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +//go:embed templates/*.go.tpl +var templatesFS embed.FS + +var reMatchNewlines = regexp.MustCompile(`\n{3,}`) +var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) + +var DefaultTitleExceptions = map[string]string{ + // common abbreviations + "acl": "ACL", + "acls": "ACLs", + "api": "API", + "apis": "APIs", + "ca": "CA", + "cidr": "CIDR", + "cidrs": "CIDRs", + "db": "DB", + "dbs": "DBs", + "dhcp": "DHCP", + "iam": "IAM", + "iot": "IOT", + "ip": "IP", + "ips": "IPs", + "ipv4": "IPv4", + "ipv6": "IPv6", + "mfa": "MFA", + "ml": "ML", + "oauth": "OAuth", + "vpc": "VPC", + "vpcs": "VPCs", + "vpn": "VPN", + "vpns": "VPNs", + "waf": "WAF", + "wafs": "WAFs", + + // cloud providers + "aws": "AWS", + "gcp": "GCP", +} + +func DefaultTitleTransformer(table *schema.Table) string { + if table.Title != "" { + return table.Title + } + csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) + return csr.ToTitle(table.Name) +} + +func sortTables(tables schema.Tables) { + sort.SliceStable(tables, func(i, j int) bool { + return tables[i].Name < tables[j].Name + }) + + for _, table := range tables { + sortTables(table.Relations) + } +} + +type templateData struct { + PluginName string + Tables schema.Tables +} + +// GeneratePluginDocs creates table documentation for the source plugin based on its list of tables +func (p *Plugin) GeneratePluginDocs(tables schema.Tables, dir string, format pbPlugin.GenDocs_FORMAT) error { + if err := os.MkdirAll(dir, os.ModePerm); err != nil { + return err + } + + setDestinationManagedCqColumns(tables) + + sortedTables := make(schema.Tables, 0, len(tables)) + for _, t := range tables { + sortedTables = append(sortedTables, t.Copy(nil)) + } + sortTables(sortedTables) + + switch format { + case pbPlugin.GenDocs_FORMAT_MARKDOWN: + return p.renderTablesAsMarkdown(dir, p.name, sortedTables) + case pbPlugin.GenDocs_FORMAT_JSON: + return p.renderTablesAsJSON(dir, sortedTables) + default: + return fmt.Errorf("unsupported format: %v", format) + } +} + +// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). +func setDestinationManagedCqColumns(tables []*schema.Table) { + for _, table := range tables { + table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) + table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) + setDestinationManagedCqColumns(table.Relations) + } +} + +type jsonTable struct { + Name string `json:"name"` + Title string `json:"title"` + Description string `json:"description"` + Columns []jsonColumn `json:"columns"` + Relations []jsonTable `json:"relations"` +} + +type jsonColumn struct { + Name string `json:"name"` + Type string `json:"type"` + IsPrimaryKey bool `json:"is_primary_key,omitempty"` + IsIncrementalKey bool `json:"is_incremental_key,omitempty"` +} + +func (p *Plugin) renderTablesAsJSON(dir string, tables schema.Tables) error { + jsonTables := p.jsonifyTables(tables) + buffer := &bytes.Buffer{} + m := json.NewEncoder(buffer) + m.SetIndent("", " ") + m.SetEscapeHTML(false) + err := m.Encode(jsonTables) + if err != nil { + return err + } + outputPath := filepath.Join(dir, "__tables.json") + return os.WriteFile(outputPath, buffer.Bytes(), 0644) +} + +func (p *Plugin) jsonifyTables(tables schema.Tables) []jsonTable { + jsonTables := make([]jsonTable, len(tables)) + for i, table := range tables { + jsonColumns := make([]jsonColumn, len(table.Columns)) + for c, col := range table.Columns { + jsonColumns[c] = jsonColumn{ + Name: col.Name, + Type: col.Type.String(), + IsPrimaryKey: col.PrimaryKey, + IsIncrementalKey: col.IncrementalKey, + } + } + jsonTables[i] = jsonTable{ + Name: table.Name, + Title: p.titleTransformer(table), + Description: table.Description, + Columns: jsonColumns, + Relations: p.jsonifyTables(table.Relations), + } + } + return jsonTables +} + +func (p *Plugin) renderTablesAsMarkdown(dir string, pluginName string, tables schema.Tables) error { + for _, table := range tables { + if err := p.renderAllTables(table, dir); err != nil { + return err + } + } + t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ + "indentToDepth": indentToDepth, + }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template for README.md: %v", err) + } + + var b bytes.Buffer + if err := t.Execute(&b, templateData{PluginName: pluginName, Tables: tables}); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + outputPath := filepath.Join(dir, "README.md") + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return nil +} + +func (p *Plugin) renderAllTables(t *schema.Table, dir string) error { + if err := p.renderTable(t, dir); err != nil { + return err + } + for _, r := range t.Relations { + if err := p.renderAllTables(r, dir); err != nil { + return err + } + } + return nil +} + +func (p *Plugin) renderTable(table *schema.Table, dir string) error { + t := template.New("").Funcs(map[string]any{ + "title": p.titleTransformer, + }) + t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template: %v", err) + } + + outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) + + var b bytes.Buffer + if err := t.Execute(&b, table); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return f.Close() +} + +func formatMarkdown(s string) string { + s = reMatchNewlines.ReplaceAllString(s, "\n\n") + return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") +} + +func indentToDepth(table *schema.Table) string { + s := "" + t := table + for t.Parent != nil { + s += " " + t = t.Parent + } + return s +} diff --git a/plugin/docs_test.go b/plugin/docs_test.go new file mode 100644 index 0000000000..44e7b34afd --- /dev/null +++ b/plugin/docs_test.go @@ -0,0 +1,164 @@ +//go:build !windows + +package plugin + +import ( + "os" + "path" + "testing" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/bradleyjkemp/cupaloy/v2" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/stretchr/testify/require" +) + +var testTables = []*schema.Table{ + { + Name: "test_table", + Description: "Description for test table", + Columns: []schema.Column{ + { + Name: "int_col", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "id_col", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + { + Name: "id_col2", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + { + Name: "json_col", + Type: types.ExtensionTypes.JSON, + }, + { + Name: "list_col", + Type: arrow.ListOf(arrow.PrimitiveTypes.Int64), + }, + { + Name: "map_col", + Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), + }, + { + Name: "struct_col", + Type: arrow.StructOf(arrow.Field{Name: "string_field", Type: arrow.BinaryTypes.String}, arrow.Field{Name: "int_field", Type: arrow.PrimitiveTypes.Int64}), + }, + }, + Relations: []*schema.Table{ + { + Name: "relation_table", + Description: "Description for relational table", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + Relations: []*schema.Table{ + { + Name: "relation_relation_table_b", + Description: "Description for relational table's relation", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + }, + { + Name: "relation_relation_table_a", + Description: "Description for relational table's relation", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + }, + }, + }, + { + Name: "relation_table2", + Description: "Description for second relational table", + Columns: []schema.Column{ + { + Name: "string_col", + Type: arrow.BinaryTypes.String, + }, + }, + }, + }, + }, + { + Name: "incremental_table", + Description: "Description for incremental table", + IsIncremental: true, + Columns: []schema.Column{ + { + Name: "int_col", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "id_col", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + IncrementalKey: true, + }, + { + Name: "id_col2", + Type: arrow.PrimitiveTypes.Int64, + IncrementalKey: true, + }, + }, + }, +} + +func TestGeneratePluginDocs(t *testing.T) { + p := NewPlugin("test", "v1.0.0", testTables, newTestExecutionClient) + + cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) + + t.Run("Markdown", func(t *testing.T) { + tmpdir := t.TempDir() + + err := p.GeneratePluginDocs(tmpdir, "markdown") + if err != nil { + t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) + } + + expectFiles := []string{"test_table.md", "relation_table.md", "relation_relation_table_a.md", "relation_relation_table_b.md", "incremental_table.md", "README.md"} + for _, exp := range expectFiles { + t.Run(exp, func(t *testing.T) { + output := path.Join(tmpdir, exp) + got, err := os.ReadFile(output) + require.NoError(t, err) + cup.SnapshotT(t, got) + }) + } + }) + + t.Run("JSON", func(t *testing.T) { + tmpdir := t.TempDir() + + err := p.GeneratePluginDocs(tmpdir, "json") + if err != nil { + t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) + } + + expectFiles := []string{"__tables.json"} + for _, exp := range expectFiles { + t.Run(exp, func(t *testing.T) { + output := path.Join(tmpdir, exp) + got, err := os.ReadFile(output) + require.NoError(t, err) + cup.SnapshotT(t, got) + }) + } + }) +} diff --git a/plugin/metrics.go b/plugin/metrics.go new file mode 100644 index 0000000000..182bc243a4 --- /dev/null +++ b/plugin/metrics.go @@ -0,0 +1,125 @@ +package plugin + +import ( + "sync/atomic" + "time" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +type Metrics struct { + TableClient map[string]map[string]*TableClientMetrics +} + +type TableClientMetrics struct { + Resources uint64 + Errors uint64 + Panics uint64 + StartTime time.Time + EndTime time.Time +} + +func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { + return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics +} + +// Equal compares to stats. Mostly useful in testing +func (s *Metrics) Equal(other *Metrics) bool { + for table, clientStats := range s.TableClient { + for client, stats := range clientStats { + if _, ok := other.TableClient[table]; !ok { + return false + } + if _, ok := other.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(other.TableClient[table][client]) { + return false + } + } + } + for table, clientStats := range other.TableClient { + for client, stats := range clientStats { + if _, ok := s.TableClient[table]; !ok { + return false + } + if _, ok := s.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(s.TableClient[table][client]) { + return false + } + } + } + return true +} + +func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { + s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) + for _, client := range clients { + s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} + } + for _, relation := range table.Relations { + s.initWithClients(relation, clients) + } +} + +func (s *Metrics) TotalErrors() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Errors + } + } + return total +} + +func (s *Metrics) TotalErrorsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Errors) + } + } + return total +} + +func (s *Metrics) TotalPanics() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Panics + } + } + return total +} + +func (s *Metrics) TotalPanicsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Panics) + } + } + return total +} + +func (s *Metrics) TotalResources() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Resources + } + } + return total +} + +func (s *Metrics) TotalResourcesAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Resources) + } + } + return total +} diff --git a/plugin/metrics_test.go b/plugin/metrics_test.go new file mode 100644 index 0000000000..a566edee5d --- /dev/null +++ b/plugin/metrics_test.go @@ -0,0 +1,37 @@ +package plugin + +import "testing" + +func TestMetrics(t *testing.T) { + s := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + s.TableClient["test_table"] = make(map[string]*TableClientMetrics) + s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if s.TotalResources() != 1 { + t.Fatal("expected 1 resource") + } + if s.TotalErrors() != 2 { + t.Fatal("expected 2 error") + } + if s.TotalPanics() != 3 { + t.Fatal("expected 3 panics") + } + + other := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + other.TableClient["test_table"] = make(map[string]*TableClientMetrics) + other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if !s.Equal(other) { + t.Fatal("expected metrics to be equal") + } +} diff --git a/plugin/options.go b/plugin/options.go new file mode 100644 index 0000000000..1290b7cd56 --- /dev/null +++ b/plugin/options.go @@ -0,0 +1,46 @@ +package plugin + +import ( + "context" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +type GetTables func(ctx context.Context, c Client) (schema.Tables, error) + +type Option func(*Plugin) + +// WithDynamicTableOption allows the plugin to return list of tables after call to New +func WithDynamicTableOption(getDynamicTables GetTables) Option { + return func(p *Plugin) { + p.getDynamicTables = getDynamicTables + } +} + +// WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables +func WithNoInternalColumns() Option { + return func(p *Plugin) { + p.internalColumns = false + } +} + +func WithUnmanaged() Option { + return func(p *Plugin) { + p.unmanaged = true + } +} + +// WithTitleTransformer allows the plugin to control how table names get turned into titles for the +// generated documentation. +func WithTitleTransformer(t func(*schema.Table) string) Option { + return func(p *Plugin) { + p.titleTransformer = t + } +} + + +func WithStaticTables(tables schema.Tables) Option { + return func(p *Plugin) { + p.staticTables = tables + } +} \ No newline at end of file diff --git a/plugin/plugin.go b/plugin/plugin.go new file mode 100644 index 0000000000..e1efa19cb1 --- /dev/null +++ b/plugin/plugin.go @@ -0,0 +1,326 @@ +package plugin + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/backend" + "github.com/cloudquery/plugin-sdk/v3/caser" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" + "golang.org/x/sync/semaphore" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" +) + +type Options struct { + Backend backend.Backend +} + +type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) + +type NewClientFunc func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) + +type UnmanagedClient interface { + schema.ClientMeta + Sync(ctx context.Context, metrics *Metrics, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error +} + +type Client interface { + Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error + Migrate(ctx context.Context, tables schema.Tables) error + Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error + DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error + Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error +} + +type UnimplementedWriter struct{} + +func (UnimplementedWriter) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { + return fmt.Errorf("not implemented") +} + +type UnimplementedSync struct{} + +func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { + return fmt.Errorf("not implemented") +} + +// Plugin is the base structure required to pass to sdk.serve +// We take a declarative approach to API here similar to Cobra +type Plugin struct { + // Name of plugin i.e aws,gcp, azure etc' + name string + // Version of the plugin + version string + // Called upon init call to validate and init configuration + newClient NewClientFunc + // dynamic table function if specified + getDynamicTables GetTables + // Tables are static tables that defined in compile time by the plugin + staticTables schema.Tables + // status sync metrics + metrics *Metrics + // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. + logger zerolog.Logger + // resourceSem is a semaphore that limits the number of concurrent resources being fetched + resourceSem *semaphore.Weighted + // tableSem is a semaphore that limits the number of concurrent tables being fetched + tableSems []*semaphore.Weighted + // maxDepth is the max depth of tables + maxDepth uint64 + // caser + caser *caser.Caser + // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) + mu sync.Mutex + + // client is the initialized session client + client Client + // sessionTables are the + sessionTables schema.Tables + // backend is the backend used to store the cursor state + backend backend.Backend + // spec is the spec the client was initialized with + spec pbPlugin.Spec + // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id + // useful for sources such as PostgreSQL and other databases + internalColumns bool + // unmanaged if set to true then the plugin will call Sync directly and not use the scheduler + unmanaged bool + // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation + titleTransformer func(*schema.Table) string + syncTime time.Time +} + +const ( + maxAllowedDepth = 4 +) + +// Add internal columns +func (p *Plugin) addInternalColumns(tables []*schema.Table) error { + for _, table := range tables { + if c := table.Column("_cq_id"); c != nil { + return fmt.Errorf("table %s already has column _cq_id", table.Name) + } + cqID := schema.CqIDColumn + if len(table.PrimaryKeys()) == 0 { + cqID.PrimaryKey = true + } + cqSourceName := schema.CqSourceNameColumn + cqSyncTime := schema.CqSyncTimeColumn + cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.spec.Name) + } + cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.syncTime) + } + + table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) + if err := p.addInternalColumns(table.Relations); err != nil { + return err + } + } + return nil +} + +// Set parent links on relational tables +func setParents(tables schema.Tables, parent *schema.Table) { + for _, table := range tables { + table.Parent = parent + setParents(table.Relations, table) + } +} + +// Apply transformations to tables +func transformTables(tables schema.Tables) error { + for _, table := range tables { + if table.Transform != nil { + if err := table.Transform(table); err != nil { + return fmt.Errorf("failed to transform table %s: %w", table.Name, err) + } + } + if err := transformTables(table.Relations); err != nil { + return err + } + } + return nil +} + +func maxDepth(tables schema.Tables) uint64 { + var depth uint64 + if len(tables) == 0 { + return 0 + } + for _, table := range tables { + newDepth := 1 + maxDepth(table.Relations) + if newDepth > depth { + depth = newDepth + } + } + return depth +} + +func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { + p := Plugin{ + name: name, + version: version, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, + } + for _, opt := range options { + opt(&p) + } + if p.staticTables != nil { + if p.internalColumns { + if err := p.addInternalColumns(p.staticTables); err != nil { + panic(err) + } + } + p.maxDepth = maxDepth(p.staticTables) + if p.maxDepth > maxAllowedDepth { + panic(fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth)) + } + if err := p.validate(p.staticTables); err != nil { + panic(err) + } + } + + return &p +} + +// Name return the name of this plugin +func (p *Plugin) Name() string { + return p.name +} + +// Version returns the version of this plugin +func (p *Plugin) Version() string { + return p.version +} + + +func (p *Plugin) SetLogger(logger zerolog.Logger) { + p.logger = logger.With().Str("module", p.name+"-src").Logger() +} + +// Tables returns all tables supported by this source plugin +func (p *Plugin) StaticTables() schema.Tables { + return p.staticTables +} + +func (p *Plugin) HasDynamicTables() bool { + return p.getDynamicTables != nil +} + +func (p *Plugin) DynamicTables() schema.Tables { + return p.sessionTables +} + +func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + return p.client.Read(ctx, table, sourceName, res) +} + +func (p *Plugin) Metrics() *Metrics { + return p.metrics +} + +func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + + var err error + p.client, err = p.newClient(ctx, p.logger, spec) + if err != nil { + return fmt.Errorf("failed to initialize client: %w", err) + } + p.spec = spec + + return nil +} + +func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables) error { + return p.client.Migrate(ctx, tables) +} + +func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { + return p.client.Write(ctx, tables, res) +} + +func (p *Plugin) Write(ctx context.Context, sourceSpec pbPlugin.Spec, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { + syncTime = syncTime.UTC() + if err := p.client.Write(ctx, tables, res); err != nil { + return err + } + if p.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE { + tablesToDelete := tables + if sourceSpec.BackendSpec != nil { + tablesToDelete = make(schema.Tables, 0, len(tables)) + for _, t := range tables { + if !t.IsIncremental { + tablesToDelete = append(tablesToDelete, t) + } + } + } + if err := p.DeleteStale(ctx, tablesToDelete, sourceSpec.Name, syncTime); err != nil { + return err + } + } + return nil +} + +func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + syncTime = syncTime.UTC() + return p.client.DeleteStale(ctx, tables, sourceName, syncTime) +} + +// Sync is syncing data from the requested tables in spec to the given channel +func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + p.syncTime = syncTime + + startTime := time.Now() + if p.unmanaged { + unmanagedClient := p.client.(UnmanagedClient) + if err := unmanagedClient.Sync(ctx, p.metrics, syncSpec, res); err != nil { + return fmt.Errorf("failed to sync unmanaged client: %w", err) + } + } else { + switch syncSpec.Scheduler { + case pbPlugin.SyncSpec_SCHEDULER_DFS: + p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, res) + case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: + p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, res) + default: + return fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String()) + } + } + + p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") + return nil +} + +func (p *Plugin) Close(ctx context.Context) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + if p.backend != nil { + err := p.backend.Close(ctx) + if err != nil { + return fmt.Errorf("failed to close backend: %w", err) + } + p.backend = nil + } + return nil +} diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go new file mode 100644 index 0000000000..9c4c094d6f --- /dev/null +++ b/plugin/plugin_round_robin_test.go @@ -0,0 +1,148 @@ +package plugin + +import ( + "context" + "fmt" + "sync" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" +) + +type testPluginClient struct { + memoryDB map[string][]arrow.Record + tables map[string]*schema.Table + memoryDBLock sync.RWMutex +} + +type testPluginSpec struct { + ConnectionString string `json:"connection_string"` +} + +func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { + return nil +} + +func (c *testPluginClient) Migrate(ctx context.Context, tables schema.Tables) error { + for _, table := range tables { + tableName := table.Name + memTable := c.memoryDB[tableName] + if memTable == nil { + c.memoryDB[tableName] = make([]arrow.Record, 0) + c.tables[tableName] = table + continue + } + + changes := table.GetChanges(c.tables[tableName]) + // memdb doesn't support any auto-migrate + if changes == nil { + continue + } + c.memoryDB[tableName] = make([]arrow.Record, 0) + c.tables[tableName] = table + } + return nil + return nil +} + +func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, resources <-chan arrow.Record) error { + for resource := range resources { + c.memoryDBLock.Lock() + sc := resource.Schema() + tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) + if !ok { + return fmt.Errorf("table name not found in schema metadata") + } + table := c.tables[tableName] + if c.spec.WriteMode == specs.WriteModeAppend { + c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) + } else { + c.overwrite(table, resource) + } + c.memoryDBLock.Unlock() + } + return nil +} + +func (c *testPluginClient) overwrite(table *schema.Table, data arrow.Record) { + pksIndex := table.PrimaryKeysIndexes() + tableName := table.Name + for i, row := range c.memoryDB[tableName] { + found := true + for _, pkIndex := range pksIndex { + s1 := data.Column(pkIndex).String() + s2 := row.Column(pkIndex).String() + if s1 != s2 { + found = false + } + } + if found { + c.memoryDB[tableName] = append(c.memoryDB[tableName][:i], c.memoryDB[tableName][i+1:]...) + c.memoryDB[tableName] = append(c.memoryDB[tableName], data) + return + } + } + c.memoryDB[tableName] = append(c.memoryDB[tableName], data) +} + +func (c *testPluginClient) deleteStaleTable(_ context.Context, table *schema.Table, source string, syncTime time.Time) { + sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) + syncColIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) + tableName := table.Name + var filteredTable []arrow.Record + for i, row := range c.memoryDB[tableName] { + if row.Column(sourceColIndex).(*array.String).Value(0) == source { + rowSyncTime := row.Column(syncColIndex).(*array.Timestamp).Value(0).ToTime(arrow.Microsecond).UTC() + if !rowSyncTime.Before(syncTime) { + filteredTable = append(filteredTable, c.memoryDB[tableName][i]) + } + } + } + c.memoryDB[tableName] = filteredTable +} + +func (c *testPluginClient) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + return nil +} + +func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + tableName := table.Name + if c.memoryDB[tableName] == nil { + return nil + } + sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) + if sourceColIndex == -1 { + return fmt.Errorf("table %s doesn't have source column", tableName) + } + var sortedRes []arrow.Record + c.memoryDBLock.RLock() + for _, row := range c.memoryDB[tableName] { + arr := row.Column(sourceColIndex) + if arr.(*array.String).Value(0) == sourceName { + sortedRes = append(sortedRes, row) + } + } + c.memoryDBLock.RUnlock() + + for _, row := range sortedRes { + res <- row + } + return nil +} + +func NewTestPluginClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { + return &testPluginClient{ + memoryDB: make(map[string][]arrow.Record), + tables: make(map[string]*schema.Table), + }, nil +} + +func TestPluginRoundRobin(t *testing.T) { + p := NewPlugin("test", "v0.0.0", NewTestPluginClient) +} \ No newline at end of file diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go new file mode 100644 index 0000000000..16afc7338c --- /dev/null +++ b/plugin/plugin_test.go @@ -0,0 +1,470 @@ +package plugin + +import ( + "context" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/scalar" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v3/transformers" + "github.com/google/go-cmp/cmp" + "github.com/google/uuid" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "golang.org/x/sync/errgroup" +) + +type testExecutionClient struct{} + +var _ schema.ClientMeta = &testExecutionClient{} + +var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") +var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") + +var testSyncTime = time.Now() + +func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { + res <- map[string]any{ + "TestColumn": 3, + } + return nil +} + +func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { + panic("Resolver") +} + +func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { + panic("PreResourceResolver") +} + +func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { + panic("ColumnResolver") +} + +func testTableSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableSuccessWithPK() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + }, + } +} + +func testTableResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_resolver_panic", + Resolver: testResolverPanic, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTablePreResourceResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_pre_resource_resolver_panic", + PreResourceResolver: testPreResourceResolverPanic, + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableColumnResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_column_resolver_panic", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "test_column1", + Type: arrow.PrimitiveTypes.Int64, + Resolver: testColumnResolverPanic, + }, + }, + } +} + +func testTableRelationSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_relation_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + Relations: []*schema.Table{ + testTableSuccess(), + }, + } +} + +func (*testExecutionClient) ID() string { + return "testExecutionClient" +} + +func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) { + return &testExecutionClient{}, nil +} + +type syncTestCase struct { + table *schema.Table + stats Metrics + data []scalar.Vector + deterministicCQID bool +} + +var syncTestCases = []syncTestCase{ + { + table: testTableSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableResolverPanic(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_resolver_panic": { + "testExecutionClient": { + Panics: 1, + }, + }, + }, + }, + data: nil, + }, + { + table: testTablePreResourceResolverPanic(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_pre_resource_resolver_panic": { + "testExecutionClient": { + Panics: 1, + }, + }, + }, + }, + data: nil, + }, + + { + table: testTableRelationSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_relation_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableColumnResolverPanic(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_column_resolver_panic": { + "testExecutionClient": { + Panics: 1, + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int64{}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableRelationSuccess(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_relation_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableSuccessWithPK(), + stats: Metrics{ + TableClient: map[string]map[string]*TableClientMetrics{ + "test_table_success": { + "testExecutionClient": { + Resources: 1, + }, + }, + }, + }, + data: []scalar.Vector{ + { + &scalar.String{Value: "testSource", Valid: true}, + &scalar.Timestamp{Value: testSyncTime, Valid: true}, + &scalar.UUID{Value: deterministicStableUUID, Valid: true}, + &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, +} + +type testRand struct{} + +func (testRand) Read(p []byte) (n int, err error) { + for i := range p { + p[i] = byte(0) + } + return len(p), nil +} + +func TestSync(t *testing.T) { + uuid.SetRand(testRand{}) + for _, scheduler := range specs.AllSchedulers { + for _, tc := range syncTestCases { + tc := tc + tc.table = tc.table.Copy(nil) + t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { + testSyncTable(t, tc, scheduler, tc.deterministicCQID) + }) + } + } +} + +func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, deterministicCQID bool) { + ctx := context.Background() + tables := []*schema.Table{ + tc.table, + } + + plugin := NewPlugin( + "testSourcePlugin", + "1.0.0", + tables, + newTestExecutionClient, + ) + plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) + spec := specs.Source{ + Name: "testSource", + Path: "cloudquery/testSource", + Tables: []string{"*"}, + Version: "v1.0.0", + Destinations: []string{"test"}, + Concurrency: 1, // choose a very low value to check that we don't run into deadlocks + Scheduler: scheduler, + DeterministicCQID: deterministicCQID, + } + if err := plugin.Init(ctx, spec); err != nil { + t.Fatal(err) + } + + resources := make(chan *schema.Resource) + g, ctx := errgroup.WithContext(ctx) + g.Go(func() error { + defer close(resources) + return plugin.Sync(ctx, + testSyncTime, + resources) + }) + + var i int + for resource := range resources { + if tc.data == nil { + t.Fatalf("Unexpected resource %v", resource) + } + if i >= len(tc.data) { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } + if !resource.GetValues().Equal(tc.data[i]) { + t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], resource.GetValues()) + } + i++ + } + if len(tc.data) != i { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } + + stats := plugin.Metrics() + if !tc.stats.Equal(stats) { + t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) + } + if err := g.Wait(); err != nil { + t.Fatal(err) + } +} + +func TestIgnoredColumns(t *testing.T) { + validateResources(t, schema.Resources{{ + Item: struct{ A *string }{}, + Table: &schema.Table{ + Columns: schema.ColumnList{ + { + Name: "a", + Type: arrow.BinaryTypes.String, + IgnoreInTests: true, + }, + }, + }, + }}) +} + +var testTable struct { + PrimaryKey string + SecondaryKey string + TertiaryKey string + Quaternary string +} + +func TestNewPluginPrimaryKeys(t *testing.T) { + testTransforms := []struct { + transformerOptions []transformers.StructTransformerOption + resultKeys []string + }{ + { + transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, + resultKeys: []string{"primary_key"}, + }, + { + transformerOptions: []transformers.StructTransformerOption{}, + resultKeys: []string{"_cq_id"}, + }, + } + for _, tc := range testTransforms { + tables := []*schema.Table{ + { + Name: "test_table", + Transform: transformers.TransformWithStruct( + &testTable, tc.transformerOptions..., + ), + }, + } + + plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) + assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) + } +} diff --git a/plugin/scheduler.go b/plugin/scheduler.go new file mode 100644 index 0000000000..373147d194 --- /dev/null +++ b/plugin/scheduler.go @@ -0,0 +1,163 @@ +package plugin + +import ( + "context" + "errors" + "fmt" + "runtime/debug" + "sync" + "sync/atomic" + "time" + + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/getsentry/sentry-go" + "github.com/rs/zerolog" + "github.com/thoas/go-funk" +) + +const ( + minTableConcurrency = 1 + minResourceConcurrency = 100 +) + +const periodicMetricLoggerInterval = 30 * time.Second + +func (p *Plugin) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { + clientName := client.ID() + for _, table := range tables { + metrics := p.metrics.TableClient[table.Name][clientName] + p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") + p.logTablesMetrics(table.Relations, client) + } +} + +func (p *Plugin) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { + var validationErr *schema.ValidationError + ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + resource := schema.NewResourceData(table, parent, item) + objectStartTime := time.Now() + clientID := client.ID() + tableMetrics := p.metrics.TableClient[table.Name][clientID] + logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + if table.PreResourceResolver != nil { + if err := table.PreResourceResolver(ctx, client, resource); err != nil { + logger.Error().Err(err).Msg("pre resource resolver failed") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + return nil + } + } + + for _, c := range table.Columns { + p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) + } + + if table.PostResourceResolver != nil { + if err := table.PostResourceResolver(ctx, client, resource); err != nil { + logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + atomic.AddUint64(&tableMetrics.Resources, 1) + return resource +} + +func (p *Plugin) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { + var validationErr *schema.ValidationError + columnStartTime := time.Now() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + + if c.Resolver != nil { + if err := c.Resolver(ctx, client, resource, c); err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } else { + // base use case: try to get column with CamelCase name + v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) + if v != nil { + err := resource.Set(c.Name, v) + if err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + } +} + +func (p *Plugin) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { + defer wg.Done() + + ticker := time.NewTicker(periodicMetricLoggerInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + p.logger.Info(). + Uint64("total_resources", p.metrics.TotalResourcesAtomic()). + Uint64("total_errors", p.metrics.TotalErrorsAtomic()). + Uint64("total_panics", p.metrics.TotalPanicsAtomic()). + Msg("Sync in progress") + } + } +} + +// unparam's suggestion to remove the second parameter is not good advice here. +// nolint:unparam +func max(a, b uint64) uint64 { + if a > b { + return a + } + return b +} diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go new file mode 100644 index 0000000000..9390966395 --- /dev/null +++ b/plugin/scheduler_dfs.go @@ -0,0 +1,230 @@ +package plugin + +import ( + "context" + "errors" + "fmt" + "runtime/debug" + "sync" + "sync/atomic" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/helpers" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/getsentry/sentry-go" + "golang.org/x/sync/semaphore" +) + +func (p *Plugin) syncDfs(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { + // This is very similar to the concurrent web crawler problem with some minor changes. + // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. + tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) + resourceConcurrency := tableConcurrency * minResourceConcurrency + + p.tableSems = make([]*semaphore.Weighted, p.maxDepth) + for i := uint64(0); i < p.maxDepth; i++ { + p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + // reduce table concurrency logarithmically for every depth level + tableConcurrency = max(tableConcurrency/2, minTableConcurrency) + } + p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + + // we have this because plugins can return sometimes clients in a random way which will cause + // differences between this run and the next one. + preInitialisedClients := make([][]schema.ClientMeta, len(tables)) + for i, table := range tables { + clients := []schema.ClientMeta{client.(schema.ClientMeta)} + if table.Multiplex != nil { + clients = table.Multiplex(client.(schema.ClientMeta)) + } + // Detect duplicate clients while multiplexing + seenClients := make(map[string]bool) + for _, c := range clients { + if _, ok := seenClients[c.ID()]; !ok { + seenClients[c.ID()] = true + } else { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage("duplicate client ID in " + table.Name) + }) + p.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") + } + } + preInitialisedClients[i] = clients + // we do this here to avoid locks so we initial the metrics structure once in the main goroutines + // and then we can just read from it in the other goroutines concurrently given we are not writing to it. + p.metrics.initWithClients(table, clients) + } + + // We start a goroutine that logs the metrics periodically. + // It needs its own waitgroup + var logWg sync.WaitGroup + logWg.Add(1) + + logCtx, logCancel := context.WithCancel(ctx) + go p.periodicMetricLogger(logCtx, &logWg) + + var wg sync.WaitGroup + for i, table := range tables { + table := table + clients := preInitialisedClients[i] + for _, client := range clients { + client := client + if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + // This means context was cancelled + wg.Wait() + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() + return + } + wg.Add(1) + go func() { + defer wg.Done() + defer p.tableSems[0].Release(1) + // not checking for error here as nothing much todo. + // the error is logged and this happens when context is cancelled + p.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) + }() + } + } + + // Wait for all the worker goroutines to finish + wg.Wait() + + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() +} + +func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { + var validationErr *schema.ValidationError + clientName := client.ID() + logger := p.logger.With().Str("table", table.Name).Str("client", clientName).Logger() + + if parent == nil { // Log only for root tables, otherwise we spam too much. + logger.Info().Msg("top level table resolver started") + } + tableMetrics := p.metrics.TableClient[table.Name][clientName] + + res := make(chan any) + go func() { + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + logger.Error().Interface("error", err).Str("stack", stack).Msg("table resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + } + close(res) + }() + if err := table.Resolver(ctx, client, parent, res); err != nil { + logger.Error().Err(err).Msg("table resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + return + } + }() + + for r := range res { + p.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) + } + + // we don't need any waitgroups here because we are waiting for the channel to close + if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. + logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") + p.logTablesMetrics(table.Relations, client) + } +} + +func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { + resourcesSlice := helpers.InterfaceSlice(resources) + if len(resourcesSlice) == 0 { + return + } + resourcesChan := make(chan *schema.Resource, len(resourcesSlice)) + go func() { + defer close(resourcesChan) + var wg sync.WaitGroup + sentValidationErrors := sync.Map{} + for i := range resourcesSlice { + i := i + if err := p.resourceSem.Acquire(ctx, 1); err != nil { + p.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") + wg.Wait() + // we have to continue emptying the channel to exit gracefully + return + } + wg.Add(1) + go func() { + defer p.resourceSem.Release(1) + defer wg.Done() + //nolint:all + resolvedResource := p.resolveResource(ctx, table, client, parent, resourcesSlice[i]) + if resolvedResource == nil { + return + } + + if err := resolvedResource.CalculateCQID(p.spec.SyncSpec.DetrministicCqId); err != nil { + tableMetrics := p.metrics.TableClient[table.Name][client.ID()] + p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") + if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { + // send resource validation errors to Sentry only once per table, + // to avoid sending too many duplicate messages + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(err.Error()) + }) + } + atomic.AddUint64(&tableMetrics.Errors, 1) + return + } + if err := resolvedResource.Validate(); err != nil { + tableMetrics := p.metrics.TableClient[table.Name][client.ID()] + p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") + if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { + // send resource validation errors to Sentry only once per table, + // to avoid sending too many duplicate messages + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(err.Error()) + }) + } + atomic.AddUint64(&tableMetrics.Errors, 1) + return + } + resourcesChan <- resolvedResource + }() + } + wg.Wait() + }() + + var wg sync.WaitGroup + for resource := range resourcesChan { + resource := resource + resolvedResources <- resource + for _, relation := range resource.Table.Relations { + relation := relation + if err := p.tableSems[depth].Acquire(ctx, 1); err != nil { + // This means context was cancelled + wg.Wait() + return + } + wg.Add(1) + go func() { + defer wg.Done() + defer p.tableSems[depth].Release(1) + p.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) + }() + } + } + wg.Wait() +} diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go new file mode 100644 index 0000000000..0554f5489e --- /dev/null +++ b/plugin/scheduler_round_robin.go @@ -0,0 +1,104 @@ +package plugin + +import ( + "context" + "sync" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/schema" + "golang.org/x/sync/semaphore" +) + +type tableClient struct { + table *schema.Table + client schema.ClientMeta +} + +func (p *Plugin) syncRoundRobin(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { + tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) + resourceConcurrency := tableConcurrency * minResourceConcurrency + + p.tableSems = make([]*semaphore.Weighted, p.maxDepth) + for i := uint64(0); i < p.maxDepth; i++ { + p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + // reduce table concurrency logarithmically for every depth level + tableConcurrency = max(tableConcurrency/2, minTableConcurrency) + } + p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + + // we have this because plugins can return sometimes clients in a random way which will cause + // differences between this run and the next one. + preInitialisedClients := make([][]schema.ClientMeta, len(tables)) + for i, table := range tables { + clients := []schema.ClientMeta{client.(schema.ClientMeta)} + if table.Multiplex != nil { + clients = table.Multiplex(client.(schema.ClientMeta)) + } + preInitialisedClients[i] = clients + // we do this here to avoid locks so we initial the metrics structure once in the main goroutines + // and then we can just read from it in the other goroutines concurrently given we are not writing to it. + p.metrics.initWithClients(table, clients) + } + + // We start a goroutine that logs the metrics periodically. + // It needs its own waitgroup + var logWg sync.WaitGroup + logWg.Add(1) + + logCtx, logCancel := context.WithCancel(ctx) + go p.periodicMetricLogger(logCtx, &logWg) + + tableClients := roundRobinInterleave(tables, preInitialisedClients) + + var wg sync.WaitGroup + for _, tc := range tableClients { + table := tc.table + cl := tc.client + if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + // This means context was cancelled + wg.Wait() + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() + return + } + wg.Add(1) + go func() { + defer wg.Done() + defer p.tableSems[0].Release(1) + // not checking for error here as nothing much to do. + // the error is logged and this happens when context is cancelled + // Round Robin currently uses the DFS algorithm to resolve the tables, but this + // may change in the future. + p.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) + }() + } + + // Wait for all the worker goroutines to finish + wg.Wait() + + // gracefully shut down the logger goroutine + logCancel() + logWg.Wait() +} + +// interleave table-clients so that we get: +// table1-client1, table2-client1, table3-client1, table1-client2, table2-client2, table3-client2, ... +func roundRobinInterleave(tables schema.Tables, preInitialisedClients [][]schema.ClientMeta) []tableClient { + tableClients := make([]tableClient, 0) + c := 0 + for { + addedNew := false + for i, table := range tables { + if c < len(preInitialisedClients[i]) { + tableClients = append(tableClients, tableClient{table: table, client: preInitialisedClients[i][c]}) + addedNew = true + } + } + c++ + if !addedNew { + break + } + } + return tableClients +} diff --git a/plugin/scheduler_round_robin_test.go b/plugin/scheduler_round_robin_test.go new file mode 100644 index 0000000000..daf7cc242f --- /dev/null +++ b/plugin/scheduler_round_robin_test.go @@ -0,0 +1,65 @@ +package plugin + +import ( + "testing" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +func TestRoundRobinInterleave(t *testing.T) { + table1 := &schema.Table{Name: "test_table"} + table2 := &schema.Table{Name: "test_table2"} + client1 := &testExecutionClient{} + client2 := &testExecutionClient{} + client3 := &testExecutionClient{} + cases := []struct { + name string + tables schema.Tables + preInitialisedClients [][]schema.ClientMeta + want []tableClient + }{ + { + name: "single table", + tables: schema.Tables{table1}, + preInitialisedClients: [][]schema.ClientMeta{{client1}}, + want: []tableClient{{table: table1, client: client1}}, + }, + { + name: "two tables with different clients", + tables: schema.Tables{table1, table2}, + preInitialisedClients: [][]schema.ClientMeta{{client1}, {client1, client2}}, + want: []tableClient{ + {table: table1, client: client1}, + {table: table2, client: client1}, + {table: table2, client: client2}, + }, + }, + { + name: "two tables with different clients", + tables: schema.Tables{table1, table2}, + preInitialisedClients: [][]schema.ClientMeta{{client1, client3}, {client1, client2}}, + want: []tableClient{ + {table: table1, client: client1}, + {table: table2, client: client1}, + {table: table1, client: client3}, + {table: table2, client: client2}, + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := roundRobinInterleave(tc.tables, tc.preInitialisedClients) + if len(got) != len(tc.want) { + t.Fatalf("got %d tableClients, want %d", len(got), len(tc.want)) + } + for i := range got { + if got[i].table != tc.want[i].table { + t.Errorf("got table %v, want %v", got[i].table, tc.want[i].table) + } + if got[i].client != tc.want[i].client { + t.Errorf("got client %v, want %v", got[i].client, tc.want[i].client) + } + } + }) + } +} diff --git a/plugin/templates/all_tables.md.go.tpl b/plugin/templates/all_tables.md.go.tpl new file mode 100644 index 0000000000..008afb66fd --- /dev/null +++ b/plugin/templates/all_tables.md.go.tpl @@ -0,0 +1,5 @@ +# Source Plugin: {{.PluginName}} +## Tables +{{- range $table := $.Tables }} +{{- template "all_tables_entry.md.go.tpl" $table}} +{{- end }} \ No newline at end of file diff --git a/plugin/templates/all_tables_entry.md.go.tpl b/plugin/templates/all_tables_entry.md.go.tpl new file mode 100644 index 0000000000..6166b1983b --- /dev/null +++ b/plugin/templates/all_tables_entry.md.go.tpl @@ -0,0 +1,5 @@ + +{{. | indentToDepth}}- [{{.Name}}]({{.Name}}.md){{ if .IsIncremental}} (Incremental){{ end }} +{{- range $index, $rel := .Relations}} +{{- template "all_tables_entry.md.go.tpl" $rel}} +{{- end}} \ No newline at end of file diff --git a/plugin/templates/table.md.go.tpl b/plugin/templates/table.md.go.tpl new file mode 100644 index 0000000000..21a8ed135e --- /dev/null +++ b/plugin/templates/table.md.go.tpl @@ -0,0 +1,44 @@ +# Table: {{$.Name}} + +This table shows data for {{.|title}}. + +{{ $.Description }} +{{ $length := len $.PrimaryKeys -}} +{{ if eq $length 1 }} +The primary key for this table is **{{ index $.PrimaryKeys 0 }}**. +{{ else }} +The composite primary key for this table is ({{ range $index, $pk := $.PrimaryKeys -}} + {{if $index }}, {{end -}} + **{{$pk}}** + {{- end -}}). +{{ end }} +{{- if $.IsIncremental -}} +It supports incremental syncs +{{- $ikLength := len $.IncrementalKeys -}} +{{- if eq $ikLength 1 }} based on the **{{ index $.IncrementalKeys 0 }}** column +{{- else if gt $ikLength 1 }} based on the ({{ range $index, $pk := $.IncrementalKeys -}} + {{- if $index -}}, {{end -}} + **{{$pk}}** + {{- end -}}) columns +{{- end -}}. +{{- end -}} + +{{- if or ($.Relations) ($.Parent) }} +## Relations +{{- end }} +{{- if $.Parent }} +This table depends on [{{ $.Parent.Name }}]({{ $.Parent.Name }}.md). +{{- end}} +{{ if $.Relations }} +The following tables depend on {{.Name}}: +{{- range $rel := $.Relations }} + - [{{ $rel.Name }}]({{ $rel.Name }}.md) +{{- end }} +{{- end }} + +## Columns +| Name | Type | +| ------------- | ------------- | +{{- range $column := $.Columns }} +|{{$column.Name}}{{if $column.PrimaryKey}} (PK){{end}}{{if $column.IncrementalKey}} (Incremental Key){{end}}|{{$column.Type}}| +{{- end }} \ No newline at end of file diff --git a/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json b/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json new file mode 100644 index 0000000000..7a8280833e --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json @@ -0,0 +1,214 @@ +[ + { + "name": "incremental_table", + "title": "Incremental Table", + "description": "Description for incremental table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid" + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "int_col", + "type": "int64" + }, + { + "name": "id_col", + "type": "int64", + "is_primary_key": true, + "is_incremental_key": true + }, + { + "name": "id_col2", + "type": "int64", + "is_incremental_key": true + } + ], + "relations": [] + }, + { + "name": "test_table", + "title": "Test Table", + "description": "Description for test table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid" + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "int_col", + "type": "int64" + }, + { + "name": "id_col", + "type": "int64", + "is_primary_key": true + }, + { + "name": "id_col2", + "type": "int64", + "is_primary_key": true + }, + { + "name": "json_col", + "type": "json" + }, + { + "name": "list_col", + "type": "list" + }, + { + "name": "map_col", + "type": "map" + }, + { + "name": "struct_col", + "type": "struct" + } + ], + "relations": [ + { + "name": "relation_table", + "title": "Relation Table", + "description": "Description for relational table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [ + { + "name": "relation_relation_table_a", + "title": "Relation Relation Table A", + "description": "Description for relational table's relation", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [] + }, + { + "name": "relation_relation_table_b", + "title": "Relation Relation Table B", + "description": "Description for relational table's relation", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [] + } + ] + }, + { + "name": "relation_table2", + "title": "Relation Table2", + "description": "Description for second relational table", + "columns": [ + { + "name": "_cq_source_name", + "type": "utf8" + }, + { + "name": "_cq_sync_time", + "type": "timestamp[us, tz=UTC]" + }, + { + "name": "_cq_id", + "type": "uuid", + "is_primary_key": true + }, + { + "name": "_cq_parent_id", + "type": "uuid" + }, + { + "name": "string_col", + "type": "utf8" + } + ], + "relations": [] + } + ] + } +] + diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md new file mode 100644 index 0000000000..9480a0598a --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md @@ -0,0 +1,10 @@ +# Source Plugin: test + +## Tables + +- [incremental_table](incremental_table.md) (Incremental) +- [test_table](test_table.md) + - [relation_table](relation_table.md) + - [relation_relation_table_a](relation_relation_table_a.md) + - [relation_relation_table_b](relation_relation_table_b.md) + - [relation_table2](relation_table2.md) diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md new file mode 100644 index 0000000000..67ca4b8539 --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md @@ -0,0 +1,20 @@ +# Table: incremental_table + +This table shows data for Incremental Table. + +Description for incremental table + +The primary key for this table is **id_col**. +It supports incremental syncs based on the (**id_col**, **id_col2**) columns. + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id|uuid| +|_cq_parent_id|uuid| +|int_col|int64| +|id_col (PK) (Incremental Key)|int64| +|id_col2 (Incremental Key)|int64| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md new file mode 100644 index 0000000000..038791b13e --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md @@ -0,0 +1,21 @@ +# Table: relation_relation_table_a + +This table shows data for Relation Relation Table A. + +Description for relational table's relation + +The primary key for this table is **_cq_id**. + +## Relations + +This table depends on [relation_table](relation_table.md). + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id (PK)|uuid| +|_cq_parent_id|uuid| +|string_col|utf8| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md new file mode 100644 index 0000000000..432f6533f8 --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md @@ -0,0 +1,21 @@ +# Table: relation_relation_table_b + +This table shows data for Relation Relation Table B. + +Description for relational table's relation + +The primary key for this table is **_cq_id**. + +## Relations + +This table depends on [relation_table](relation_table.md). + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id (PK)|uuid| +|_cq_parent_id|uuid| +|string_col|utf8| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md new file mode 100644 index 0000000000..7db8baff7e --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md @@ -0,0 +1,25 @@ +# Table: relation_table + +This table shows data for Relation Table. + +Description for relational table + +The primary key for this table is **_cq_id**. + +## Relations + +This table depends on [test_table](test_table.md). + +The following tables depend on relation_table: + - [relation_relation_table_a](relation_relation_table_a.md) + - [relation_relation_table_b](relation_relation_table_b.md) + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id (PK)|uuid| +|_cq_parent_id|uuid| +|string_col|utf8| diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md new file mode 100644 index 0000000000..f0c91578a5 --- /dev/null +++ b/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md @@ -0,0 +1,29 @@ +# Table: test_table + +This table shows data for Test Table. + +Description for test table + +The composite primary key for this table is (**id_col**, **id_col2**). + +## Relations + +The following tables depend on test_table: + - [relation_table](relation_table.md) + - [relation_table2](relation_table2.md) + +## Columns + +| Name | Type | +| ------------- | ------------- | +|_cq_source_name|utf8| +|_cq_sync_time|timestamp[us, tz=UTC]| +|_cq_id|uuid| +|_cq_parent_id|uuid| +|int_col|int64| +|id_col (PK)|int64| +|id_col2 (PK)|int64| +|json_col|json| +|list_col|list| +|map_col|map| +|struct_col|struct| diff --git a/plugin/testing.go b/plugin/testing.go new file mode 100644 index 0000000000..562da87461 --- /dev/null +++ b/plugin/testing.go @@ -0,0 +1,141 @@ +package plugin + +import ( + "context" + "testing" + "time" + + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) + +func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...TestPluginOption) { + t.Helper() + + o := &testPluginOptions{ + parallel: true, + validators: []Validator{validatePlugin}, + } + for _, opt := range opts { + opt(o) + } + if o.parallel { + t.Parallel() + } + + resourcesChannel := make(chan *schema.Resource) + var syncErr error + + if err := plugin.Init(context.Background(), spec); err != nil { + t.Fatal(err) + } + + go func() { + defer close(resourcesChannel) + syncErr = plugin.Sync(context.Background(), time.Now(), *spec.SyncSpec, resourcesChannel) + }() + + syncedResources := make([]*schema.Resource, 0) + for resource := range resourcesChannel { + syncedResources = append(syncedResources, resource) + } + if syncErr != nil { + t.Fatal(syncErr) + } + for _, validator := range o.validators { + validator(t, plugin, syncedResources) + } +} + +type TestPluginOption func(*testPluginOptions) + +func WithTestPluginNoParallel() TestPluginOption { + return func(f *testPluginOptions) { + f.parallel = false + } +} + +func WithTestPluginAdditionalValidators(v Validator) TestPluginOption { + return func(f *testPluginOptions) { + f.validators = append(f.validators, v) + } +} + +type testPluginOptions struct { + parallel bool + validators []Validator +} + +func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Resource) []*schema.Resource { + t.Helper() + + tableResources := make([]*schema.Resource, 0) + + for _, resource := range resources { + if resource.Table.Name == table.Name { + tableResources = append(tableResources, resource) + } + } + + return tableResources +} + +func validateTable(t *testing.T, table *schema.Table, resources []*schema.Resource) { + t.Helper() + tableResources := getTableResources(t, table, resources) + if len(tableResources) == 0 { + t.Errorf("Expected table %s to be synced but it was not found", table.Name) + return + } + validateResources(t, tableResources) +} + +func validatePlugin(t *testing.T, plugin *Plugin, resources []*schema.Resource) { + t.Helper() + tables := extractTables(plugin.staticTables) + for _, table := range tables { + validateTable(t, table, resources) + } +} + +func extractTables(tables schema.Tables) []*schema.Table { + result := make([]*schema.Table, 0) + for _, table := range tables { + result = append(result, table) + result = append(result, extractTables(table.Relations)...) + } + return result +} + +// Validates that every column has at least one non-nil value. +// Also does some additional validations. +func validateResources(t *testing.T, resources []*schema.Resource) { + t.Helper() + + table := resources[0].Table + + // A set of column-names that have values in at least one of the resources. + columnsWithValues := make([]bool, len(table.Columns)) + + for _, resource := range resources { + for i, value := range resource.GetValues() { + if value == nil { + continue + } + if value.IsValid() { + columnsWithValues[i] = true + } + } + } + + // Make sure every column has at least one value. + for i, hasValue := range columnsWithValues { + col := table.Columns[i] + emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil + if !hasValue && !emptyExpected && !col.IgnoreInTests { + t.Errorf("table: %s column %s has no values", table.Name, table.Columns[i].Name) + } + } +} diff --git a/plugin/validate.go b/plugin/validate.go new file mode 100644 index 0000000000..0b21133b05 --- /dev/null +++ b/plugin/validate.go @@ -0,0 +1,27 @@ +package plugin + +import ( + "fmt" + + "github.com/cloudquery/plugin-sdk/v3/schema" +) + +func (p *Plugin) validate(tables schema.Tables) error { + if err := tables.ValidateDuplicateColumns(); err != nil { + return fmt.Errorf("found duplicate columns in source plugin: %s: %w", p.name, err) + } + + if err := tables.ValidateDuplicateTables(); err != nil { + return fmt.Errorf("found duplicate tables in source plugin: %s: %w", p.name, err) + } + + if err := tables.ValidateTableNames(); err != nil { + return fmt.Errorf("found table with invalid name in source plugin: %s: %w", p.name, err) + } + + if err := tables.ValidateColumnNames(); err != nil { + return fmt.Errorf("found column with invalid name in source plugin: %s: %w", p.name, err) + } + + return nil +} diff --git a/serve/plugin.go b/serve/plugin.go new file mode 100644 index 0000000000..b37be8513c --- /dev/null +++ b/serve/plugin.go @@ -0,0 +1,235 @@ +package serve + +import ( + "fmt" + "net" + "os" + "os/signal" + "strings" + "sync" + "syscall" + + "github.com/cloudquery/plugin-sdk/v3/plugin" + + pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" + pbv0 "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" + + serversv0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/plugin/v0" + "github.com/getsentry/sentry-go" + grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" + "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + "github.com/spf13/cobra" + "github.com/thoas/go-funk" + "golang.org/x/net/netutil" + "google.golang.org/grpc" + "google.golang.org/grpc/test/bufconn" +) + +type pluginServe struct { + plugin *plugin.Plugin + sentryDSN string +} + +type PluginOption func(*pluginServe) + +func WithPluginSentryDSN(dsn string) PluginOption { + return func(s *pluginServe) { + s.sentryDSN = dsn + } +} + +// lis used for unit testing grpc server and client +var testPluginListener *bufconn.Listener +var testPluginListenerLock sync.Mutex + +const servePluginShort = `Start plugin server` + +func Plugin(plugin *plugin.Plugin, opts ...PluginOption) { + s := &pluginServe{ + plugin: plugin, + } + for _, opt := range opts { + opt(s) + } + if err := newCmdPluginRoot(s).Execute(); err != nil { + sentry.CaptureMessage(err.Error()) + fmt.Println(err) + os.Exit(1) + } +} + +// nolint:dupl +func newCmdPluginServe(serve *pluginServe) *cobra.Command { + var address string + var network string + var noSentry bool + logLevel := newEnum([]string{"trace", "debug", "info", "warn", "error"}, "info") + logFormat := newEnum([]string{"text", "json"}, "text") + telemetryLevel := newEnum([]string{"none", "errors", "stats", "all"}, "all") + err := telemetryLevel.Set(getEnvOrDefault("CQ_TELEMETRY_LEVEL", telemetryLevel.Value)) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to set telemetry level: "+err.Error()) + os.Exit(1) + } + + cmd := &cobra.Command{ + Use: "serve", + Short: serveSourceShort, + Long: serveSourceShort, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + zerologLevel, err := zerolog.ParseLevel(logLevel.String()) + if err != nil { + return err + } + var logger zerolog.Logger + if logFormat.String() == "json" { + logger = zerolog.New(os.Stdout).Level(zerologLevel) + } else { + logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) + } + + // opts.Plugin.Logger = logger + var listener net.Listener + if network == "test" { + testSourceListenerLock.Lock() + listener = bufconn.Listen(testBufSize) + testSourceListener = listener.(*bufconn.Listener) + testSourceListenerLock.Unlock() + } else { + listener, err = net.Listen(network, address) + if err != nil { + return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) + } + } + // source plugins can only accept one connection at a time + // unlike destination plugins that can accept multiple connections + limitListener := netutil.LimitListener(listener, 1) + // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go + s := grpc.NewServer( + grpc.ChainUnaryInterceptor( + logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), + ), + grpc.ChainStreamInterceptor( + logging.StreamServerInterceptor(grpczerolog.InterceptorLogger(logger)), + ), + grpc.MaxRecvMsgSize(MaxMsgSize), + grpc.MaxSendMsgSize(MaxMsgSize), + ) + serve.plugin.SetLogger(logger) + pbv0.RegisterPluginServer(s, &serversv0.Server{ + Plugin: serve.plugin, + Logger: logger, + }) + pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ + Versions: []string{"v2"}, + }) + + version := serve.plugin.Version() + + if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { + err = sentry.Init(sentry.ClientOptions{ + Dsn: serve.sentryDSN, + Debug: false, + AttachStacktrace: false, + Release: version, + Transport: sentry.NewHTTPSyncTransport(), + ServerName: "oss", // set to "oss" on purpose to avoid sending any identifying information + // https://docs.sentry.io/platforms/go/configuration/options/#removing-default-integrations + Integrations: func(integrations []sentry.Integration) []sentry.Integration { + var filteredIntegrations []sentry.Integration + for _, integration := range integrations { + if integration.Name() == "Modules" { + continue + } + filteredIntegrations = append(filteredIntegrations, integration) + } + return filteredIntegrations + }, + }) + if err != nil { + log.Error().Err(err).Msg("Error initializing sentry") + } + } + + ctx := cmd.Context() + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt, syscall.SIGTERM) + defer func() { + signal.Stop(c) + }() + + go func() { + select { + case sig := <-c: + logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Source plugin server shutting down") + s.Stop() + case <-ctx.Done(): + logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Source plugin server shutting down") + s.Stop() + } + }() + + logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") + if err := s.Serve(limitListener); err != nil { + return fmt.Errorf("failed to serve: %w", err) + } + return nil + }, + } + cmd.Flags().StringVar(&address, "address", "localhost:7777", "address to serve on. can be tcp: `localhost:7777` or unix socket: `/tmp/plugin.rpc.sock`") + cmd.Flags().StringVar(&network, "network", "tcp", `the network must be "tcp", "tcp4", "tcp6", "unix" or "unixpacket"`) + cmd.Flags().Var(logLevel, "log-level", fmt.Sprintf("log level. one of: %s", strings.Join(logLevel.Allowed, ","))) + cmd.Flags().Var(logFormat, "log-format", fmt.Sprintf("log format. one of: %s", strings.Join(logFormat.Allowed, ","))) + cmd.Flags().BoolVar(&noSentry, "no-sentry", false, "disable sentry") + sendErrors := funk.ContainsString([]string{"all", "errors"}, telemetryLevel.String()) + if !sendErrors { + noSentry = true + } + + return cmd +} + +const ( + pluginDocShort = "Generate documentation for tables" + pluginDocLong = `Generate documentation for tables + +If format is markdown, a destination directory will be created (if necessary) containing markdown files. +Example: +doc ./output + +If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. +Example: +doc --format json . +` +) + +func newCmdPluginDoc(serve *pluginServe) *cobra.Command { + format := newEnum([]string{"json", "markdown"}, "markdown") + cmd := &cobra.Command{ + Use: "doc ", + Short: sourceDocShort, + Long: sourceDocLong, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + pbFormat := pbv0.GenDocs_FORMAT(pbv0.GenDocs_FORMAT_value[format.Value]) + return serve.plugin.GeneratePluginDocs(serve.plugin.StaticTables(), args[0], pbFormat) + }, + } + cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) + return cmd +} + +func newCmdPluginRoot(serve *pluginServe) *cobra.Command { + cmd := &cobra.Command{ + Use: fmt.Sprintf("%s ", serve.plugin.Name()), + } + cmd.AddCommand(newCmdPluginServe(serve)) + cmd.AddCommand(newCmdPluginDoc(serve)) + cmd.CompletionOptions.DisableDefaultCmd = true + cmd.Version = serve.plugin.Version() + return cmd +} diff --git a/serve/plugin_test.go b/serve/plugin_test.go new file mode 100644 index 0000000000..8a541611e9 --- /dev/null +++ b/serve/plugin_test.go @@ -0,0 +1,238 @@ +package serve + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net" + "sync" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/ipc" + pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" + "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-sdk/v3/plugins/source" + "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/rs/zerolog" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +type TestSourcePluginSpec struct { + Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` +} + +type testExecutionClient struct{} + +var _ schema.ClientMeta = &testExecutionClient{} + +// var errTestExecutionClientErr = fmt.Errorf("error in newTestExecutionClientErr") + +func testTable(name string) *schema.Table { + return &schema.Table{ + Name: name, + Resolver: func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { + res <- map[string]any{ + "TestColumn": 3, + } + return nil + }, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func (*testExecutionClient) ID() string { + return "testExecutionClient" +} + +func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, source.Options) (schema.ClientMeta, error) { + return &testExecutionClient{}, nil +} + +func bufSourceDialer(context.Context, string) (net.Conn, error) { + testSourceListenerLock.Lock() + defer testSourceListenerLock.Unlock() + return testSourceListener.Dial() +} + +func TestSourceSuccess(t *testing.T) { + plugin := source.NewPlugin( + "testPlugin", + "v1.0.0", + []*schema.Table{testTable("test_table"), testTable("test_table2")}, + newTestExecutionClient) + + cmd := newCmdSourceRoot(&sourceServe{ + plugin: plugin, + }) + cmd.SetArgs([]string{"serve", "--network", "test"}) + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + var wg sync.WaitGroup + wg.Add(1) + var serverErr error + go func() { + defer wg.Done() + serverErr = cmd.ExecuteContext(ctx) + }() + defer func() { + cancel() + wg.Wait() + }() + for { + testSourceListenerLock.Lock() + if testSourceListener != nil { + testSourceListenerLock.Unlock() + break + } + testSourceListenerLock.Unlock() + t.Log("waiting for grpc server to start") + time.Sleep(time.Millisecond * 200) + } + + // https://stackoverflow.com/questions/42102496/testing-a-grpc-service + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufSourceDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + if err != nil { + t.Fatalf("Failed to dial bufnet: %v", err) + } + c := pb.NewSourceClient(conn) + + getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) + if err != nil { + t.Fatal(err) + } + if getNameRes.Name != "testPlugin" { + t.Fatalf("expected name to be testPlugin but got %s", getNameRes.Name) + } + + getVersionResponse, err := c.GetVersion(ctx, &pb.GetVersion_Request{}) + if err != nil { + t.Fatal(err) + } + if getVersionResponse.Version != "v1.0.0" { + t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) + } + + spec := specs.Source{ + Name: "testSourcePlugin", + Version: "v1.0.0", + Path: "cloudquery/testSourcePlugin", + Registry: specs.RegistryGithub, + Tables: []string{"test_table"}, + Spec: TestSourcePluginSpec{Accounts: []string{"cloudquery/plugin-sdk"}}, + Destinations: []string{"test"}, + } + specMarshaled, err := json.Marshal(spec) + if err != nil { + t.Fatalf("Failed to marshal spec: %v", err) + } + + getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) + if err != nil { + t.Fatal(err) + } + + tables, err := schema.NewTablesFromBytes(getTablesRes.Tables) + if err != nil { + t.Fatal(err) + } + + if len(tables) != 2 { + t.Fatalf("Expected 2 tables but got %d", len(tables)) + } + if _, err := c.Init(ctx, &pb.Init_Request{Spec: specMarshaled}); err != nil { + t.Fatal(err) + } + + getTablesForSpecRes, err := c.GetDynamicTables(ctx, &pb.GetDynamicTables_Request{}) + if err != nil { + t.Fatal(err) + } + tables, err = schema.NewTablesFromBytes(getTablesForSpecRes.Tables) + if err != nil { + t.Fatal(err) + } + + if len(tables) != 1 { + t.Fatalf("Expected 1 table but got %d", len(tables)) + } + + syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) + if err != nil { + t.Fatal(err) + } + var resources []arrow.Record + for { + r, err := syncClient.Recv() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) + if err != nil { + t.Fatal(err) + } + for rdr.Next() { + rec := rdr.Record() + rec.Retain() + resources = append(resources, rec) + } + } + + totalResources := 0 + for _, resource := range resources { + sc := resource.Schema() + tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) + if !ok { + t.Fatal("Expected table name metadata to be set") + } + if tableName != "test_table" { + t.Fatalf("Expected resource with table name test_table. got: %s", tableName) + } + if len(resource.Columns()) != 5 { + t.Fatalf("Expected resource with data length 3 but got %d", len(resource.Columns())) + } + totalResources++ + } + if totalResources != 1 { + t.Fatalf("Expected 1 resource on channel but got %d", totalResources) + } + + getMetricsRes, err := c.GetMetrics(ctx, &pb.GetMetrics_Request{}) + if err != nil { + t.Fatal(err) + } + var stats source.Metrics + if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { + t.Fatal(err) + } + + clientStats := stats.TableClient[""][""] + if clientStats.Resources != 1 { + t.Fatalf("Expected 1 resource but got %d", clientStats.Resources) + } + + if clientStats.Errors != 0 { + t.Fatalf("Expected 0 errors but got %d", clientStats.Errors) + } + + if clientStats.Panics != 0 { + t.Fatalf("Expected 0 panics but got %d", clientStats.Panics) + } + + cancel() + wg.Wait() + if serverErr != nil { + t.Fatal(serverErr) + } +} From 1af9d0ebf0332d72615d5b8e9230214e22cc42ae Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Tue, 30 May 2023 20:53:37 +0300 Subject: [PATCH 052/125] wip --- go.mod | 7 +- go.sum | 2 + internal/memdb/memdb.go | 41 +- internal/memdb/memdb_test.go | 128 ++--- internal/pk/pk.go | 2 +- .../servers/destination/v0/destinations.go | 12 +- .../servers/destination/v0/schemav2tov3.go | 4 +- internal/servers/destination/v0/specv3tov1.go | 77 +++ .../servers/destination/v1/destinations.go | 12 +- internal/servers/destination/v1/specv3tov1.go | 77 +++ internal/servers/plugin/{v0 => v3}/plugin.go | 48 +- internal/servers/source/v2/source.go | 173 ------- ...hmark_test.go => benchmark_test.go.backup} | 2 +- {plugins/destination => plugin}/diff.go | 0 plugin/docs.go | 6 +- plugin/{docs_test.go => docs_test.go.backup} | 4 +- .../destination => plugin}/managed_writer.go | 15 +- plugin/metrics.go | 2 +- {plugins/destination => plugin}/nulls.go | 6 +- plugin/options.go | 33 +- plugin/plugin.go | 191 +++++-- .../plugin_managed_source_test.go | 142 +++--- plugin/plugin_round_robin_test.go | 102 +++- plugin/plugin_test.go | 470 ------------------ plugin/scheduler.go | 2 +- plugin/scheduler_dfs.go | 6 +- plugin/scheduler_round_robin.go | 4 +- plugin/scheduler_round_robin_test.go | 2 +- .../testing_overwrite_deletestale.go | 21 +- plugin/{testing.go => testing_sync.go} | 42 +- .../testing_write.go | 38 +- .../testing_write_append.go | 17 +- .../testing_write_migrate.go | 39 +- .../testing_write_overwrite.go | 19 +- plugin/validate.go | 2 +- plugins/destination/metrics.go | 8 - plugins/destination/plugin.go | 314 ------------ plugins/destination/unmanaged_writer.go | 14 - plugins/docs.go | 2 - plugins/source/benchmark_test.go | 429 ---------------- plugins/source/docs.go | 241 --------- plugins/source/docs_test.go | 164 ------ plugins/source/metrics.go | 207 -------- plugins/source/metrics_test.go | 186 ------- plugins/source/options.go | 39 -- plugins/source/plugin.go | 345 ------------- plugins/source/scheduler.go | 177 ------- plugins/source/scheduler_dfs.go | 234 --------- plugins/source/scheduler_round_robin.go | 104 ---- plugins/source/scheduler_round_robin_test.go | 65 --- plugins/source/templates/all_tables.md.go.tpl | 5 - .../templates/all_tables_entry.md.go.tpl | 5 - plugins/source/templates/table.md.go.tpl | 44 -- .../TestGeneratePluginDocs-JSON-__tables.json | 214 -------- .../TestGeneratePluginDocs-Markdown-README.md | 10 - ...tePluginDocs-Markdown-incremental_table.md | 20 - ...Docs-Markdown-relation_relation_table_a.md | 21 - ...Docs-Markdown-relation_relation_table_b.md | 21 - ...eratePluginDocs-Markdown-relation_table.md | 25 - ...tGeneratePluginDocs-Markdown-test_table.md | 29 -- plugins/source/testing.go | 141 ------ plugins/source/validate.go | 25 - scalar/inet.go | 2 +- scalar/json.go | 2 +- scalar/mac.go | 2 +- scalar/scalar.go | 12 +- scalar/uuid.go | 2 +- schema/meta.go | 4 +- schema/resource.go | 2 +- schema/table.go | 2 +- schema/testdata.go | 3 +- serve/destination.go | 209 -------- serve/destination_v0_test.go | 32 +- serve/destination_v1_test.go | 21 +- serve/plugin.go | 51 +- serve/plugin_test.go | 68 +-- serve/source.go | 233 --------- serve/source_v2_test.go | 238 --------- transformers/struct.go | 6 +- transformers/struct_test.go | 4 +- 80 files changed, 908 insertions(+), 4822 deletions(-) create mode 100644 internal/servers/destination/v0/specv3tov1.go create mode 100644 internal/servers/destination/v1/specv3tov1.go rename internal/servers/plugin/{v0 => v3}/plugin.go (87%) delete mode 100644 internal/servers/source/v2/source.go rename plugin/{benchmark_test.go => benchmark_test.go.backup} (99%) rename {plugins/destination => plugin}/diff.go (100%) rename plugin/{docs_test.go => docs_test.go.backup} (97%) rename {plugins/destination => plugin}/managed_writer.go (92%) rename {plugins/destination => plugin}/nulls.go (94%) rename plugins/source/plugin_test.go => plugin/plugin_managed_source_test.go (76%) delete mode 100644 plugin/plugin_test.go rename plugins/destination/plugin_testing_overwrite_delete_stale.go => plugin/testing_overwrite_deletestale.go (91%) rename plugin/{testing.go => testing_sync.go} (73%) rename plugins/destination/plugin_testing.go => plugin/testing_write.go (88%) rename plugins/destination/plugin_testing_write_append.go => plugin/testing_write_append.go (85%) rename plugins/destination/plugin_testing_migrate.go => plugin/testing_write_migrate.go (85%) rename plugins/destination/plugin_testing_overwrite.go => plugin/testing_write_overwrite.go (87%) delete mode 100644 plugins/destination/metrics.go delete mode 100644 plugins/destination/plugin.go delete mode 100644 plugins/destination/unmanaged_writer.go delete mode 100644 plugins/docs.go delete mode 100644 plugins/source/benchmark_test.go delete mode 100644 plugins/source/docs.go delete mode 100644 plugins/source/docs_test.go delete mode 100644 plugins/source/metrics.go delete mode 100644 plugins/source/metrics_test.go delete mode 100644 plugins/source/options.go delete mode 100644 plugins/source/plugin.go delete mode 100644 plugins/source/scheduler.go delete mode 100644 plugins/source/scheduler_dfs.go delete mode 100644 plugins/source/scheduler_round_robin.go delete mode 100644 plugins/source/scheduler_round_robin_test.go delete mode 100644 plugins/source/templates/all_tables.md.go.tpl delete mode 100644 plugins/source/templates/all_tables_entry.md.go.tpl delete mode 100644 plugins/source/templates/table.md.go.tpl delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md delete mode 100644 plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md delete mode 100644 plugins/source/testing.go delete mode 100644 plugins/source/validate.go delete mode 100644 serve/destination.go delete mode 100644 serve/source.go delete mode 100644 serve/source_v2_test.go diff --git a/go.mod b/go.mod index fab4f9e1b3..5adf4d1599 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,9 @@ -module github.com/cloudquery/plugin-sdk/v3 +module github.com/cloudquery/plugin-sdk/v4 go 1.19 require ( - github.com/apache/arrow/go/v13 v13.0.0-20230622042343-ec413b7763fe + github.com/apache/arrow/go/v13 v13.0.0-20230525142029-2d32efeedad8 github.com/bradleyjkemp/cupaloy/v2 v2.8.0 github.com/cloudquery/plugin-pb-go v1.2.0 github.com/cloudquery/plugin-sdk/v2 v2.7.0 @@ -31,6 +31,9 @@ replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13 replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go require ( + github.com/andybalholm/brotli v1.0.5 // indirect + github.com/apache/thrift v0.16.0 // indirect + github.com/cloudquery/plugin-sdk/v3 v3.7.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect diff --git a/go.sum b/go.sum index 8f7dfaf7d1..17a7a98de3 100644 --- a/go.sum +++ b/go.sum @@ -44,6 +44,8 @@ github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSE github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= +github.com/cloudquery/plugin-sdk/v3 v3.7.0 h1:aRazh17V+6AA00vmxPZRv2rudNEerSd3kqbyffRl6SA= +github.com/cloudquery/plugin-sdk/v3 v3.7.0/go.mod h1:z9Fny7SO8fNyVx6bOTM037lo7h3vJI+ZHUc/RMj20VU= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 9c6bbb74d1..c84c32255e 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -10,15 +10,15 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) // client is mostly used for testing the destination plugin. type client struct { - spec specs.Destination + spec pbPlugin.Spec memoryDB map[string][]arrow.Record tables map[string]*schema.Table memoryDBLock sync.RWMutex @@ -40,7 +40,7 @@ func WithBlockingWrite() Option { } } -func GetNewClient(options ...Option) destination.NewClientFunc { +func GetNewClient(options ...Option) plugin.NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -48,7 +48,7 @@ func GetNewClient(options ...Option) destination.NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, specs.Destination) (destination.Client, error) { + return func(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { return c, nil } } @@ -61,7 +61,7 @@ func getTestLogger(t *testing.T) zerolog.Logger { ).Level(zerolog.DebugLevel).With().Timestamp().Logger() } -func NewClient(_ context.Context, _ zerolog.Logger, spec specs.Destination) (destination.Client, error) { +func NewClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (plugin.Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), @@ -69,7 +69,7 @@ func NewClient(_ context.Context, _ zerolog.Logger, spec specs.Destination) (des }, nil } -func NewClientErrOnNew(context.Context, zerolog.Logger, specs.Destination) (destination.Client, error) { +func NewClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -94,6 +94,21 @@ func (c *client) overwrite(table *schema.Table, data arrow.Record) { c.memoryDB[tableName] = append(c.memoryDB[tableName], data) } +func (c *client) ID() string { + return "testDestinationMemDB" +} + +func (c *client) Sync(ctx context.Context, metrics *plugin.Metrics, res chan<- arrow.Record) error { + c.memoryDBLock.RLock() + for tableName := range c.memoryDB { + for _, row := range c.memoryDB[tableName] { + res <- row + } + } + c.memoryDBLock.RUnlock() + return nil +} + func (c *client) Migrate(_ context.Context, tables schema.Tables) error { for _, table := range tables { tableName := table.Name @@ -160,7 +175,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan ar return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if c.spec.WriteMode == specs.WriteModeAppend { + if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -184,7 +199,7 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou tableName := table.Name for _, resource := range resources { c.memoryDBLock.Lock() - if c.spec.WriteMode == specs.WriteModeAppend { + if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -194,8 +209,8 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou return nil } -func (*client) Metrics() destination.Metrics { - return destination.Metrics{} +func (*client) Metrics() plugin.Metrics { + return plugin.Metrics{} } func (c *client) Close(context.Context) error { @@ -224,4 +239,4 @@ func (c *client) deleteStaleTable(_ context.Context, table *schema.Table, source } } c.memoryDB[tableName] = filteredTable -} +} \ No newline at end of file diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index 7f9e8a5759..b5196d45f3 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -6,38 +6,38 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" "github.com/stretchr/testify/require" ) -var migrateStrategyOverwrite = destination.MigrateStrategy{ - AddColumn: specs.MigrateModeForced, - AddColumnNotNull: specs.MigrateModeForced, - RemoveColumn: specs.MigrateModeForced, - RemoveColumnNotNull: specs.MigrateModeForced, - ChangeColumn: specs.MigrateModeForced, +var migrateStrategyOverwrite = plugin.MigrateStrategy{ + AddColumn: pbPlugin.WriteSpec_FORCE, + AddColumnNotNull: pbPlugin.WriteSpec_FORCE, + RemoveColumn: pbPlugin.WriteSpec_FORCE, + RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, + ChangeColumn: pbPlugin.WriteSpec_FORCE, } -var migrateStrategyAppend = destination.MigrateStrategy{ - AddColumn: specs.MigrateModeForced, - AddColumnNotNull: specs.MigrateModeForced, - RemoveColumn: specs.MigrateModeForced, - RemoveColumnNotNull: specs.MigrateModeForced, - ChangeColumn: specs.MigrateModeForced, +var migrateStrategyAppend = plugin.MigrateStrategy{ + AddColumn: pbPlugin.WriteSpec_FORCE, + AddColumnNotNull: pbPlugin.WriteSpec_FORCE, + RemoveColumn: pbPlugin.WriteSpec_FORCE, + RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, + ChangeColumn: pbPlugin.WriteSpec_FORCE, } func TestPluginUnmanagedClient(t *testing.T) { - destination.PluginTestSuiteRunner( + plugin.PluginTestSuiteRunner( t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient) + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient) }, - specs.Destination{}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }, @@ -45,51 +45,55 @@ func TestPluginUnmanagedClient(t *testing.T) { } func TestPluginManagedClient(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient, destination.WithManagedWriter()) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter()) }, - specs.Destination{}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient, destination.WithManagedWriter(), - destination.WithDefaultBatchSize(1), - destination.WithDefaultBatchSizeBytes(1)) - }, specs.Destination{}, - destination.PluginTestSuiteTests{ + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), + plugin.WithDefaultBatchSize(1), + plugin.WithDefaultBatchSizeBytes(1)) + }, pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient, destination.WithManagedWriter(), - destination.WithDefaultBatchSize(100000000), - destination.WithDefaultBatchSizeBytes(100000000)) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), + plugin.WithDefaultBatchSize(100000000), + plugin.WithDefaultBatchSizeBytes(100000000)) }, - specs.Destination{}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{}, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithCQPKs(t *testing.T) { - destination.PluginTestSuiteRunner(t, - func() *destination.Plugin { - return destination.NewPlugin("test", "development", NewClient) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewClient) }, - specs.Destination{PKMode: specs.PKModeCQID}, - destination.PluginTestSuiteTests{ + pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{ + PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, + }, + }, + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) @@ -97,8 +101,8 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() - p := destination.NewPlugin("test", "development", NewClientErrOnNew) - err := p.Init(ctx, getTestLogger(t), specs.Destination{}) + p := plugin.NewPlugin("test", "development", NewClientErrOnNew) + err := p.Init(ctx, pbPlugin.Spec{}) if err == nil { t.Fatal("expected error") @@ -108,8 +112,8 @@ func TestPluginOnNewError(t *testing.T) { func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) - p := destination.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, getTestLogger(t), specs.Destination{}); err != nil { + p := plugin.NewPlugin("test", "development", newClientFunc) + if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -118,7 +122,7 @@ func TestOnWriteError(t *testing.T) { } sourceName := "TestDestinationOnWriteError" syncTime := time.Now() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } ch := make(chan arrow.Record, 1) @@ -143,8 +147,8 @@ func TestOnWriteError(t *testing.T) { func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) - p := destination.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, getTestLogger(t), specs.Destination{}); err != nil { + p := plugin.NewPlugin("test", "development", newClientFunc) + if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -153,7 +157,7 @@ func TestOnWriteCtxCancelled(t *testing.T) { } sourceName := "TestDestinationOnWriteError" syncTime := time.Now() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } ch := make(chan arrow.Record, 1) @@ -180,22 +184,22 @@ func TestPluginInit(t *testing.T) { ) var ( - batchSizeObserved int - batchSizeBytesObserved int + batchSizeObserved uint64 + batchSizeBytesObserved uint64 ) - p := destination.NewPlugin( + p := plugin.NewPlugin( "test", "development", - func(ctx context.Context, logger zerolog.Logger, s specs.Destination) (destination.Client, error) { - batchSizeObserved = s.BatchSize - batchSizeBytesObserved = s.BatchSizeBytes + func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (plugin.Client, error) { + batchSizeObserved = s.WriteSpec.BatchSize + batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes return NewClient(ctx, logger, s) }, - destination.WithDefaultBatchSize(batchSize), - destination.WithDefaultBatchSizeBytes(batchSizeBytes), + plugin.WithDefaultBatchSize(batchSize), + plugin.WithDefaultBatchSizeBytes(batchSizeBytes), ) - require.NoError(t, p.Init(context.TODO(), getTestLogger(t), specs.Destination{})) + require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{})) require.Equal(t, batchSize, batchSizeObserved) require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -} +} \ No newline at end of file diff --git a/internal/pk/pk.go b/internal/pk/pk.go index 22b2b277db..ca8c5f2806 100644 --- a/internal/pk/pk.go +++ b/internal/pk/pk.go @@ -4,7 +4,7 @@ import ( "strings" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) func String(resource arrow.Record) string { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index c09b242e4c..c315bd0652 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -12,8 +12,8 @@ import ( pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" @@ -22,7 +22,7 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *destination.Plugin + Plugin *plugin.Plugin Logger zerolog.Logger spec specs.Destination } @@ -39,7 +39,8 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, s.Logger, spec) + specV3 := SpecV1ToV3(spec) + return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, specV3) } func (s *Server) GetName(context.Context, *pbBase.GetName_Request) (*pbBase.GetName_Response, error) { @@ -102,8 +103,9 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) + sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) }) sourceColumn := &schemav2.Text{} _ = sourceColumn.Set(sourceSpec.Name) diff --git a/internal/servers/destination/v0/schemav2tov3.go b/internal/servers/destination/v0/schemav2tov3.go index eabd37fd94..3b63448b15 100644 --- a/internal/servers/destination/v0/schemav2tov3.go +++ b/internal/servers/destination/v0/schemav2tov3.go @@ -8,8 +8,8 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" ) func TablesV2ToV3(tables schemav2.Tables) schema.Tables { diff --git a/internal/servers/destination/v0/specv3tov1.go b/internal/servers/destination/v0/specv3tov1.go new file mode 100644 index 0000000000..31ab4fb5de --- /dev/null +++ b/internal/servers/destination/v0/specv3tov1.go @@ -0,0 +1,77 @@ +package destination + +import ( + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-pb-go/specs" +) + +func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + SyncSpec: &pbPlugin.SyncSpec{ + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), + DetrministicCqId: spec.DeterministicCQID, + }, + } + switch spec.Scheduler { + case specs.SchedulerDFS: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + case specs.SchedulerRoundRobin: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + default: + panic("invalid scheduler " + spec.Scheduler.String()) + } + return newSpec +} + +func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + WriteSpec: &pbPlugin.WriteSpec{ + BatchSize: uint64(spec.BatchSize), + BatchSizeBytes: uint64(spec.BatchSizeBytes), + }, + } + switch spec.Registry { + case specs.RegistryGithub: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + case specs.RegistryGrpc: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + case specs.RegistryLocal: + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + default: + panic("invalid registry " + spec.Registry.String()) + } + switch spec.WriteMode { + case specs.WriteModeAppend: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + case specs.WriteModeOverwrite: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + case specs.WriteModeOverwriteDeleteStale: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + default: + panic("invalid write mode " + spec.WriteMode.String()) + } + switch spec.PKMode { + case specs.PKModeDefaultKeys: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + case specs.PKModeCQID: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + } + switch spec.MigrateMode { + case specs.MigrateModeSafe: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + case specs.MigrateModeForced: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + default: + panic("invalid migrate mode " + spec.MigrateMode.String()) + } + return newSpec +} \ No newline at end of file diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 447c03b596..4748c3c947 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -11,8 +11,8 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" @@ -21,7 +21,7 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *destination.Plugin + Plugin *plugin.Plugin Logger zerolog.Logger spec specs.Destination } @@ -32,7 +32,8 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.Logger, spec) + specV3 := SpecV1ToV3(spec) + return &pb.Configure_Response{}, s.Plugin.Init(ctx, specV3) } func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { @@ -96,8 +97,9 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { syncTime := r.Timestamp.AsTime() s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) + sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) }) for { diff --git a/internal/servers/destination/v1/specv3tov1.go b/internal/servers/destination/v1/specv3tov1.go new file mode 100644 index 0000000000..31ab4fb5de --- /dev/null +++ b/internal/servers/destination/v1/specv3tov1.go @@ -0,0 +1,77 @@ +package destination + +import ( + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-pb-go/specs" +) + +func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + SyncSpec: &pbPlugin.SyncSpec{ + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), + DetrministicCqId: spec.DeterministicCQID, + }, + } + switch spec.Scheduler { + case specs.SchedulerDFS: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + case specs.SchedulerRoundRobin: + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + default: + panic("invalid scheduler " + spec.Scheduler.String()) + } + return newSpec +} + +func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { + newSpec := pbPlugin.Spec{ + Name: spec.Name, + Version: spec.Version, + Path: spec.Path, + WriteSpec: &pbPlugin.WriteSpec{ + BatchSize: uint64(spec.BatchSize), + BatchSizeBytes: uint64(spec.BatchSizeBytes), + }, + } + switch spec.Registry { + case specs.RegistryGithub: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + case specs.RegistryGrpc: + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + case specs.RegistryLocal: + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + default: + panic("invalid registry " + spec.Registry.String()) + } + switch spec.WriteMode { + case specs.WriteModeAppend: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + case specs.WriteModeOverwrite: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + case specs.WriteModeOverwriteDeleteStale: + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + default: + panic("invalid write mode " + spec.WriteMode.String()) + } + switch spec.PKMode { + case specs.PKModeDefaultKeys: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + case specs.PKModeCQID: + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + } + switch spec.MigrateMode { + case specs.MigrateModeSafe: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + case specs.MigrateModeForced: + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + default: + panic("invalid migrate mode " + spec.MigrateMode.String()) + } + return newSpec +} \ No newline at end of file diff --git a/internal/servers/plugin/v0/plugin.go b/internal/servers/plugin/v3/plugin.go similarity index 87% rename from internal/servers/plugin/v0/plugin.go rename to internal/servers/plugin/v3/plugin.go index d00b16059c..8a117bee9f 100644 --- a/internal/servers/plugin/v0/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -12,14 +12,10 @@ import ( "path/filepath" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" - "github.com/apache/arrow/go/v13/arrow/memory" - pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/plugin" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" @@ -34,7 +30,7 @@ type Server struct { pb.UnimplementedPluginServer Plugin *plugin.Plugin Logger zerolog.Logger - spec pb.Spec + spec pb.Spec } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -81,23 +77,19 @@ func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Respo } func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { - resources := make(chan *schema.Resource) + records := make(chan arrow.Record) var syncErr error ctx := stream.Context() go func() { - defer close(resources) - err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, resources) + defer close(records) + err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, records) if err != nil { - syncErr = fmt.Errorf("failed to sync resources: %w", err) + syncErr = fmt.Errorf("failed to sync records: %w", err) } }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() + for rec := range records { var buf bytes.Buffer w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) @@ -111,9 +103,11 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { msg := &pb.Sync_Response{ Resource: buf.Bytes(), } - err := checkMessageSize(msg, resource) + err := checkMessageSize(msg, rec) if err != nil { - s.Logger.Warn().Str("table", resource.Table.Name). + sc := rec.Schema() + tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) + s.Logger.Warn().Str("table", tName). Int("bytes", len(msg.String())). Msg("Row exceeding max bytes ignored") continue @@ -130,7 +124,7 @@ func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMet // Aggregate metrics before sending to keep response size small. // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 m := s.Plugin.Metrics() - agg := &source.TableClientMetrics{} + agg := &plugin.TableClientMetrics{} for _, table := range m.TableClient { for _, tableClient := range table { agg.Resources += tableClient.Resources @@ -138,8 +132,8 @@ func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMet agg.Panics += tableClient.Panics } } - b, err := json.Marshal(&source.Metrics{ - TableClient: map[string]map[string]*source.TableClientMetrics{"": {"": agg}}, + b, err := json.Marshal(&plugin.Metrics{ + TableClient: map[string]map[string]*plugin.TableClientMetrics{"": {"": agg}}, }) if err != nil { return nil, fmt.Errorf("failed to marshal source metrics: %w", err) @@ -255,7 +249,7 @@ func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) e } if err := srv.Send(&pb.GenDocs_Response{ Filename: f.Name(), - Content: content, + Content: content, }); err != nil { return fmt.Errorf("failed to send file: %w", err) } @@ -263,12 +257,14 @@ func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) e return nil } -func checkMessageSize(msg proto.Message, resource *schema.Resource) error { +func checkMessageSize(msg proto.Message, record arrow.Record) error { size := proto.Size(msg) // log error to Sentry if row exceeds half of the max size if size > MaxMsgSize/2 { + sc := record.Schema() + tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) + scope.SetTag("table", tName) scope.SetExtra("bytes", size) sentry.CurrentHub().CaptureMessage("Large message detected") }) @@ -292,4 +288,4 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { } setCQIDAsPrimaryKeysForTables(table.Relations) } -} \ No newline at end of file +} diff --git a/internal/servers/source/v2/source.go b/internal/servers/source/v2/source.go deleted file mode 100644 index a010fefef3..0000000000 --- a/internal/servers/source/v2/source.go +++ /dev/null @@ -1,173 +0,0 @@ -package source - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/ipc" - "github.com/apache/arrow/go/v13/arrow/memory" - pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/getsentry/sentry-go" - "github.com/rs/zerolog" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - "google.golang.org/protobuf/proto" -) - -const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB - -type Server struct { - pb.UnimplementedSourceServer - Plugin *source.Plugin - Logger zerolog.Logger -} - -func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTables_Response, error) { - tables := s.Plugin.Tables().ToArrowSchemas() - encoded, err := tables.Encode() - if err != nil { - return nil, fmt.Errorf("failed to encode tables: %w", err) - } - return &pb.GetTables_Response{ - Tables: encoded, - }, nil -} - -func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { - tables := s.Plugin.GetDynamicTables().ToArrowSchemas() - encoded, err := tables.Encode() - if err != nil { - return nil, fmt.Errorf("failed to encode tables: %w", err) - } - return &pb.GetDynamicTables_Response{ - Tables: encoded, - }, nil -} - -func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { - return &pb.GetName_Response{ - Name: s.Plugin.Name(), - }, nil -} - -func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVersion_Response, error) { - return &pb.GetVersion_Response{ - Version: s.Plugin.Version(), - }, nil -} - -func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - var spec specs.Source - dec := json.NewDecoder(bytes.NewReader(req.Spec)) - dec.UseNumber() - // TODO: warn about unknown fields - if err := dec.Decode(&spec); err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to decode spec: %v", err) - } - - if err := s.Plugin.Init(ctx, spec); err != nil { - return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) - } - return &pb.Init_Response{}, nil -} - -func (s *Server) Sync(req *pb.Sync_Request, stream pb.Source_SyncServer) error { - resources := make(chan *schema.Resource) - var syncErr error - ctx := stream.Context() - - go func() { - defer close(resources) - err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), resources) - if err != nil { - syncErr = fmt.Errorf("failed to sync resources: %w", err) - } - }() - - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - - var buf bytes.Buffer - w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) - if err := w.Write(rec); err != nil { - return status.Errorf(codes.Internal, "failed to write record: %v", err) - } - if err := w.Close(); err != nil { - return status.Errorf(codes.Internal, "failed to close writer: %v", err) - } - - msg := &pb.Sync_Response{ - Resource: buf.Bytes(), - } - err := checkMessageSize(msg, resource) - if err != nil { - s.Logger.Warn().Str("table", resource.Table.Name). - Int("bytes", len(msg.String())). - Msg("Row exceeding max bytes ignored") - continue - } - if err := stream.Send(msg); err != nil { - return status.Errorf(codes.Internal, "failed to send resource: %v", err) - } - } - - return syncErr -} - -func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMetrics_Response, error) { - // Aggregate metrics before sending to keep response size small. - // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 - m := s.Plugin.Metrics() - agg := &source.TableClientMetrics{} - for _, table := range m.TableClient { - for _, tableClient := range table { - agg.Resources += tableClient.Resources - agg.Errors += tableClient.Errors - agg.Panics += tableClient.Panics - } - } - b, err := json.Marshal(&source.Metrics{ - TableClient: map[string]map[string]*source.TableClientMetrics{"": {"": agg}}, - }) - if err != nil { - return nil, fmt.Errorf("failed to marshal source metrics: %w", err) - } - return &pb.GetMetrics_Response{ - Metrics: b, - }, nil -} - -func (s *Server) GenDocs(_ context.Context, req *pb.GenDocs_Request) (*pb.GenDocs_Response, error) { - err := s.Plugin.GeneratePluginDocs(req.Path, req.Format.String()) - if err != nil { - return nil, fmt.Errorf("failed to generate docs: %w", err) - } - return &pb.GenDocs_Response{}, nil -} - -func checkMessageSize(msg proto.Message, resource *schema.Resource) error { - size := proto.Size(msg) - // log error to Sentry if row exceeds half of the max size - if size > MaxMsgSize/2 { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetExtra("bytes", size) - sentry.CurrentHub().CaptureMessage("Large message detected") - }) - } - if size > MaxMsgSize { - return errors.New("message exceeds max size") - } - return nil -} diff --git a/plugin/benchmark_test.go b/plugin/benchmark_test.go.backup similarity index 99% rename from plugin/benchmark_test.go rename to plugin/benchmark_test.go.backup index 36a86cd3cd..a1bf87d5a8 100644 --- a/plugin/benchmark_test.go +++ b/plugin/benchmark_test.go.backup @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" ) diff --git a/plugins/destination/diff.go b/plugin/diff.go similarity index 100% rename from plugins/destination/diff.go rename to plugin/diff.go diff --git a/plugin/docs.go b/plugin/docs.go index 5827e5edcf..e66bf7ebb2 100644 --- a/plugin/docs.go +++ b/plugin/docs.go @@ -11,9 +11,9 @@ import ( "sort" "text/template" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/schema" ) //go:embed templates/*.go.tpl diff --git a/plugin/docs_test.go b/plugin/docs_test.go.backup similarity index 97% rename from plugin/docs_test.go rename to plugin/docs_test.go.backup index 44e7b34afd..06f271f9fd 100644 --- a/plugin/docs_test.go +++ b/plugin/docs_test.go.backup @@ -9,8 +9,8 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/bradleyjkemp/cupaloy/v2" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/stretchr/testify/require" ) diff --git a/plugins/destination/managed_writer.go b/plugin/managed_writer.go similarity index 92% rename from plugins/destination/managed_writer.go rename to plugin/managed_writer.go index 0d00f14bc3..74092f785e 100644 --- a/plugins/destination/managed_writer.go +++ b/plugin/managed_writer.go @@ -1,17 +1,16 @@ -package destination +package plugin import ( "context" "fmt" "sync" - "sync/atomic" "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/internal/pk" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/internal/pk" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type worker struct { @@ -33,7 +32,7 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab } return } - if len(resources) == p.spec.BatchSize || sizeBytes+util.TotalRecordSize(r) > int64(p.spec.BatchSizeBytes) { + if uint64(len(resources)) == p.spec.WriteSpec.BatchSize || sizeBytes+util.TotalRecordSize(r) > int64(p.spec.WriteSpec.BatchSizeBytes) { p.flush(ctx, metrics, table, resources) resources = resources[:0] // allows for mem reuse sizeBytes = 0 @@ -67,10 +66,10 @@ func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Tabl if err := p.client.WriteTableBatch(ctx, table, resources); err != nil { p.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") // we don't return an error as we need to continue until channel is closed otherwise there will be a deadlock - atomic.AddUint64(&metrics.Errors, uint64(batchSize)) + // atomic.AddUint64(&metrics.Errors, uint64(batchSize)) } else { p.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") - atomic.AddUint64(&metrics.Writes, uint64(batchSize)) + // atomic.AddUint64(&metrics.Writes, uint64(batchSize)) } } @@ -166,4 +165,4 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tab } p.workersLock.Unlock() return nil -} +} \ No newline at end of file diff --git a/plugin/metrics.go b/plugin/metrics.go index 182bc243a4..8ba88823b9 100644 --- a/plugin/metrics.go +++ b/plugin/metrics.go @@ -4,7 +4,7 @@ import ( "sync/atomic" "time" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type Metrics struct { diff --git a/plugins/destination/nulls.go b/plugin/nulls.go similarity index 94% rename from plugins/destination/nulls.go rename to plugin/nulls.go index 6f965106e4..12ad0facf7 100644 --- a/plugins/destination/nulls.go +++ b/plugin/nulls.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "github.com/apache/arrow/go/v13/arrow" @@ -69,4 +69,8 @@ func (f AllowNullFunc) replaceNullsByEmpty(records []arrow.Record) { } records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) } +<<<<<<< HEAD:plugins/destination/nulls.go } +======= +} +>>>>>>> 5ba1713 (wip):plugin/nulls.go diff --git a/plugin/options.go b/plugin/options.go index 1290b7cd56..d3104875e7 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -2,16 +2,17 @@ package plugin import ( "context" + "time" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type GetTables func(ctx context.Context, c Client) (schema.Tables, error) type Option func(*Plugin) -// WithDynamicTableOption allows the plugin to return list of tables after call to New -func WithDynamicTableOption(getDynamicTables GetTables) Option { +// WithDynamicTable allows the plugin to return list of tables after call to New +func WithDynamicTable(getDynamicTables GetTables) Option { return func(p *Plugin) { p.getDynamicTables = getDynamicTables } @@ -38,9 +39,33 @@ func WithTitleTransformer(t func(*schema.Table) string) Option { } } - func WithStaticTables(tables schema.Tables) Option { return func(p *Plugin) { p.staticTables = tables } +} + + +func WithManagedWriter() Option { + return func(p *Plugin) { + p.managedWriter = true + } +} + +func WithBatchTimeout(seconds int) Option { + return func(p *Plugin) { + p.batchTimeout = time.Duration(seconds) * time.Second + } +} + +func WithDefaultBatchSize(defaultBatchSize int) Option { + return func(p *Plugin) { + p.defaultBatchSize = defaultBatchSize + } +} + +func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { + return func(p *Plugin) { + p.defaultBatchSizeBytes = defaultBatchSizeBytes + } } \ No newline at end of file diff --git a/plugin/plugin.go b/plugin/plugin.go index e1efa19cb1..e812cf941c 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,14 +7,17 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/backend" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/backend" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/semaphore" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" ) type Options struct { @@ -25,28 +28,40 @@ type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, type NewClientFunc func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) -type UnmanagedClient interface { - schema.ClientMeta - Sync(ctx context.Context, metrics *Metrics, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error -} - type Client interface { - Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error + ID() string + Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error Migrate(ctx context.Context, tables schema.Tables) error + WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error + Close(ctx context.Context) error } type UnimplementedWriter struct{} -func (UnimplementedWriter) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { +func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables) error { + return fmt.Errorf("not implemented") +} + +func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error { + return fmt.Errorf("not implemented") +} + +func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { return fmt.Errorf("not implemented") } type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { +func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + +type UnimplementedRead struct{} + +func (UnimplementedRead) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } @@ -94,6 +109,14 @@ type Plugin struct { // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation titleTransformer func(*schema.Table) string syncTime time.Time + + managedWriter bool + workers map[string]*worker + workersLock *sync.Mutex + + batchTimeout time.Duration + defaultBatchSize int + defaultBatchSizeBytes int } const ( @@ -168,10 +191,11 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... p := Plugin{ name: name, version: version, - internalColumns: true, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - newClient: newClient, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, + metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, } for _, opt := range options { opt(&p) @@ -204,7 +228,6 @@ func (p *Plugin) Version() string { return p.version } - func (p *Plugin) SetLogger(logger zerolog.Logger) { p.logger = logger.With().Str("module", p.name+"-src").Logger() } @@ -222,6 +245,21 @@ func (p *Plugin) DynamicTables() schema.Tables { return p.sessionTables } +func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { + var readErr error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + readErr = p.Read(ctx, table, sourceName, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, readErr +} + func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { return p.client.Read(ctx, table, sourceName, res) } @@ -243,6 +281,43 @@ func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { } p.spec = spec + tables := p.staticTables + if p.getDynamicTables != nil { + tables, err = p.getDynamicTables(ctx, p.client) + if err != nil { + return fmt.Errorf("failed to get dynamic tables: %w", err) + } + + tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) + if err != nil { + return fmt.Errorf("failed to filter tables: %w", err) + } + if len(tables) == 0 { + return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") + } + + setParents(tables, nil) + if err := transformTables(tables); err != nil { + return err + } + if p.internalColumns { + if err := p.addInternalColumns(tables); err != nil { + return err + } + } + + p.maxDepth = maxDepth(tables) + if p.maxDepth > maxAllowedDepth { + return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) + } + } else { + tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) + if err != nil { + return fmt.Errorf("failed to filter tables: %w", err) + } + } + p.sessionTables = tables + return nil } @@ -254,6 +329,41 @@ func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables sche return p.client.Write(ctx, tables, res) } +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeOne(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resource arrow.Record) error { + resources := []arrow.Record{resource} + return p.writeAll(ctx, sourceSpec, syncTime, resources) +} + +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeAll(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resources []arrow.Record) error { + ch := make(chan arrow.Record, len(resources)) + for _, resource := range resources { + ch <- resource + } + close(ch) + tables := make(schema.Tables, 0) + tableNames := make(map[string]struct{}) + for _, resource := range resources { + sc := resource.Schema() + tableMD := sc.Metadata() + name, found := tableMD.GetValue(schema.MetadataTableName) + if !found { + return fmt.Errorf("missing table name") + } + if _, ok := tableNames[name]; ok { + continue + } + table, err := schema.NewTableFromArrowSchema(resource.Schema()) + if err != nil { + return err + } + tables = append(tables, table) + tableNames[table.Name] = struct{}{} + } + return p.Write(ctx, sourceSpec, tables, syncTime, ch) +} + func (p *Plugin) Write(ctx context.Context, sourceSpec pbPlugin.Spec, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { syncTime = syncTime.UTC() if err := p.client.Write(ctx, tables, res); err != nil { @@ -281,8 +391,23 @@ func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceNa return p.client.DeleteStale(ctx, tables, sourceName, syncTime) } +func (p *Plugin) syncAll(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec) ([]arrow.Record, error) { + var err error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + err = p.Sync(ctx, syncTime, syncSpec, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, err +} + // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- *schema.Resource) error { +func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- arrow.Record) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } @@ -291,18 +416,28 @@ func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin startTime := time.Now() if p.unmanaged { - unmanagedClient := p.client.(UnmanagedClient) - if err := unmanagedClient.Sync(ctx, p.metrics, syncSpec, res); err != nil { + if err := p.client.Sync(ctx, p.metrics, res); err != nil { return fmt.Errorf("failed to sync unmanaged client: %w", err) } } else { - switch syncSpec.Scheduler { - case pbPlugin.SyncSpec_SCHEDULER_DFS: - p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, res) - case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: - p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, res) - default: - return fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String()) + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch syncSpec.Scheduler { + case pbPlugin.SyncSpec_SCHEDULER_DFS: + p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, resources) + case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: + p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, resources) + default: + panic(fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String())) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec } } @@ -322,5 +457,5 @@ func (p *Plugin) Close(ctx context.Context) error { } p.backend = nil } - return nil + return p.client.Close(ctx) } diff --git a/plugins/source/plugin_test.go b/plugin/plugin_managed_source_test.go similarity index 76% rename from plugins/source/plugin_test.go rename to plugin/plugin_managed_source_test.go index 08b38da24d..159c7dd9c8 100644 --- a/plugins/source/plugin_test.go +++ b/plugin/plugin_managed_source_test.go @@ -1,23 +1,24 @@ -package source +package plugin import ( "context" + "fmt" "testing" "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/transformers" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" "github.com/google/uuid" "github.com/rs/zerolog" - "github.com/stretchr/testify/assert" - "golang.org/x/sync/errgroup" ) -type testExecutionClient struct{} +type testExecutionClient struct { + UnimplementedWriter +} var _ schema.ClientMeta = &testExecutionClient{} @@ -137,7 +138,19 @@ func (*testExecutionClient) ID() string { return "testExecutionClient" } -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) { +func (*testExecutionClient) Close(context.Context) error { + return nil +} + +func (*testExecutionClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + +func (*testExecutionClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + +func newTestExecutionClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { return &testExecutionClient{}, nil } @@ -345,18 +358,18 @@ func (testRand) Read(p []byte) (n int, err error) { func TestSync(t *testing.T) { uuid.SetRand(testRand{}) - for _, scheduler := range specs.AllSchedulers { + for _, scheduler := range pbPlugin.SyncSpec_SCHEDULER_value { for _, tc := range syncTestCases { tc := tc tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { - testSyncTable(t, tc, scheduler, tc.deterministicCQID) + t.Run(tc.table.Name+"_"+pbPlugin.SyncSpec_SCHEDULER(scheduler).String(), func(t *testing.T) { + testSyncTable(t, tc, pbPlugin.SyncSpec_SCHEDULER(scheduler), tc.deterministicCQID) }) } } } -func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SCHEDULER, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, @@ -365,43 +378,43 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, det plugin := NewPlugin( "testSourcePlugin", "1.0.0", - tables, newTestExecutionClient, + WithStaticTables(tables), ) plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - spec := specs.Source{ - Name: "testSource", - Path: "cloudquery/testSource", - Tables: []string{"*"}, - Version: "v1.0.0", - Destinations: []string{"test"}, - Concurrency: 1, // choose a very low value to check that we don't run into deadlocks - Scheduler: scheduler, - DeterministicCQID: deterministicCQID, + spec := pbPlugin.Spec{ + Name: "testSource", + Path: "cloudquery/testSource", + Version: "v1.0.0", + SyncSpec: &pbPlugin.SyncSpec{ + Tables: []string{"*"}, + Destinations: []string{"test"}, + Concurrency: 1, // choose a very low value to check that we don't run into deadlocks + Scheduler: scheduler, + DetrministicCqId: deterministicCQID, + }, } if err := plugin.Init(ctx, spec); err != nil { t.Fatal(err) } - resources := make(chan *schema.Resource) - g, ctx := errgroup.WithContext(ctx) - g.Go(func() error { - defer close(resources) - return plugin.Sync(ctx, - testSyncTime, - resources) - }) + records, err := plugin.syncAll(ctx, testSyncTime, *spec.SyncSpec) + if err != nil { + t.Fatal(err) + } var i int - for resource := range resources { + for _, record := range records { if tc.data == nil { - t.Fatalf("Unexpected resource %v", resource) + t.Fatalf("Unexpected resource %v", record) } if i >= len(tc.data) { t.Fatalf("expected %d resources. got %d", len(tc.data), i) } - if !resource.GetValues().Equal(tc.data[i]) { - t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], resource.GetValues()) + rec := tc.data[i].ToArrowRecord(record.Schema()) + if !array.RecordEqual(rec, record) { + t.Fatal(RecordDiff(rec, record)) + // t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) } i++ } @@ -413,9 +426,6 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, det if !tc.stats.Equal(stats) { t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) } - if err := g.Wait(); err != nil { - t.Fatal(err) - } } func TestIgnoredColumns(t *testing.T) { @@ -440,31 +450,31 @@ var testTable struct { Quaternary string } -func TestNewPluginPrimaryKeys(t *testing.T) { - testTransforms := []struct { - transformerOptions []transformers.StructTransformerOption - resultKeys []string - }{ - { - transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, - resultKeys: []string{"primary_key"}, - }, - { - transformerOptions: []transformers.StructTransformerOption{}, - resultKeys: []string{"_cq_id"}, - }, - } - for _, tc := range testTransforms { - tables := []*schema.Table{ - { - Name: "test_table", - Transform: transformers.TransformWithStruct( - &testTable, tc.transformerOptions..., - ), - }, - } - - plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) - assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) - } -} +// func TestNewPluginPrimaryKeys(t *testing.T) { +// testTransforms := []struct { +// transformerOptions []transformers.StructTransformerOption +// resultKeys []string +// }{ +// { +// transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, +// resultKeys: []string{"primary_key"}, +// }, +// { +// transformerOptions: []transformers.StructTransformerOption{}, +// resultKeys: []string{"_cq_id"}, +// }, +// } +// for _, tc := range testTransforms { +// tables := []*schema.Table{ +// { +// Name: "test_table", +// Transform: transformers.TransformWithStruct( +// &testTable, tc.transformerOptions..., +// ), +// }, +// } + +// plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) +// assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) +// } +// } diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go index 9c4c094d6f..64b6472387 100644 --- a/plugin/plugin_round_robin_test.go +++ b/plugin/plugin_round_robin_test.go @@ -10,22 +10,33 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) type testPluginClient struct { - memoryDB map[string][]arrow.Record - tables map[string]*schema.Table - memoryDBLock sync.RWMutex + memoryDB map[string][]arrow.Record + tables map[string]*schema.Table + spec pbPlugin.Spec + memoryDBLock sync.RWMutex } type testPluginSpec struct { ConnectionString string `json:"connection_string"` } -func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error { +func (c *testPluginClient) ID() string { + return "test-plugin" +} + +func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { + c.memoryDBLock.RLock() + for tableName := range c.memoryDB { + for _, row := range c.memoryDB[tableName] { + res <- row + } + } + c.memoryDBLock.RUnlock() return nil } @@ -48,7 +59,6 @@ func (c *testPluginClient) Migrate(ctx context.Context, tables schema.Tables) er c.tables[tableName] = table } return nil - return nil } func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, resources <-chan arrow.Record) error { @@ -60,7 +70,7 @@ func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, reso return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if c.spec.WriteMode == specs.WriteModeAppend { + if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -108,6 +118,14 @@ func (c *testPluginClient) deleteStaleTable(_ context.Context, table *schema.Tab } func (c *testPluginClient) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + for _, table := range tables { + c.deleteStaleTable(ctx, table, sourceName, syncTime) + } + return nil +} + +func (c *testPluginClient) Close(ctx context.Context) error { + c.memoryDB = nil return nil } @@ -136,13 +154,77 @@ func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, source return nil } -func NewTestPluginClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { +func NewTestPluginClient(ctx context.Context, logger zerolog.Logger, spec pbPlugin.Spec) (Client, error) { return &testPluginClient{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), + spec: spec, }, nil } func TestPluginRoundRobin(t *testing.T) { - p := NewPlugin("test", "v0.0.0", NewTestPluginClient) + ctx := context.Background() + p := NewPlugin("test", "v0.0.0", NewTestPluginClient, WithUnmanaged()) + testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) + syncTime := time.Now().UTC() + testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ + SourceName: "test", + SyncTime: syncTime, + MaxRows: 1, + }) + spec := pbPlugin.Spec{ + Name: "test", + Path: "cloudquery/test", + Version: "v1.0.0", + Registry: pbPlugin.Spec_REGISTRY_GITHUB, + WriteSpec: &pbPlugin.WriteSpec{}, + SyncSpec: &pbPlugin.SyncSpec{}, + } + if err := p.Init(ctx, spec); err != nil { + t.Fatal(err) + } + + if err := p.Migrate(ctx, schema.Tables{testTable}); err != nil { + t.Fatal(err) + } + if err := p.writeAll(ctx, spec, syncTime, testRecords); err != nil { + t.Fatal(err) + } + gotRecords, err := p.readAll(ctx, testTable, "test") + if err != nil { + t.Fatal(err) + } + if len(gotRecords) != len(testRecords) { + t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) + } + if !array.RecordEqual(testRecords[0], gotRecords[0]) { + t.Fatal("records are not equal") + } + records, err := p.syncAll(ctx, syncTime, *spec.SyncSpec) + if err != nil { + t.Fatal(err) + } + if len(records) != 1 { + t.Fatalf("got %d resources, want 1", len(records)) + } + + if !array.RecordEqual(testRecords[0], records[0]) { + t.Fatal("records are not equal") + } + + newSyncTime := time.Now().UTC() + if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { + t.Fatal(err) + } + records, err = p.syncAll(ctx, syncTime, *spec.SyncSpec) + if err != nil { + t.Fatal(err) + } + if len(records) != 0 { + t.Fatalf("got %d resources, want 0", len(records)) + } + + if err := p.Close(ctx); err != nil { + t.Fatal(err) + } } \ No newline at end of file diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go deleted file mode 100644 index 16afc7338c..0000000000 --- a/plugin/plugin_test.go +++ /dev/null @@ -1,470 +0,0 @@ -package plugin - -import ( - "context" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/transformers" - "github.com/google/go-cmp/cmp" - "github.com/google/uuid" - "github.com/rs/zerolog" - "github.com/stretchr/testify/assert" - "golang.org/x/sync/errgroup" -) - -type testExecutionClient struct{} - -var _ schema.ClientMeta = &testExecutionClient{} - -var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") -var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") - -var testSyncTime = time.Now() - -func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil -} - -func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { - panic("Resolver") -} - -func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { - panic("PreResourceResolver") -} - -func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { - panic("ColumnResolver") -} - -func testTableSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableSuccessWithPK() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - }, - } -} - -func testTableResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_resolver_panic", - Resolver: testResolverPanic, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTablePreResourceResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_pre_resource_resolver_panic", - PreResourceResolver: testPreResourceResolverPanic, - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableColumnResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_column_resolver_panic", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "test_column1", - Type: arrow.PrimitiveTypes.Int64, - Resolver: testColumnResolverPanic, - }, - }, - } -} - -func testTableRelationSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_relation_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - Relations: []*schema.Table{ - testTableSuccess(), - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) { - return &testExecutionClient{}, nil -} - -type syncTestCase struct { - table *schema.Table - stats Metrics - data []scalar.Vector - deterministicCQID bool -} - -var syncTestCases = []syncTestCase{ - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - { - table: testTablePreResourceResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_pre_resource_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableColumnResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_column_resolver_panic": { - "testExecutionClient": { - Panics: 1, - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - &scalar.Int64{}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableSuccessWithPK(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: deterministicStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int64{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, -} - -type testRand struct{} - -func (testRand) Read(p []byte) (n int, err error) { - for i := range p { - p[i] = byte(0) - } - return len(p), nil -} - -func TestSync(t *testing.T) { - uuid.SetRand(testRand{}) - for _, scheduler := range specs.AllSchedulers { - for _, tc := range syncTestCases { - tc := tc - tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { - testSyncTable(t, tc, scheduler, tc.deterministicCQID) - }) - } - } -} - -func testSyncTable(t *testing.T, tc syncTestCase, scheduler specs.Scheduler, deterministicCQID bool) { - ctx := context.Background() - tables := []*schema.Table{ - tc.table, - } - - plugin := NewPlugin( - "testSourcePlugin", - "1.0.0", - tables, - newTestExecutionClient, - ) - plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - spec := specs.Source{ - Name: "testSource", - Path: "cloudquery/testSource", - Tables: []string{"*"}, - Version: "v1.0.0", - Destinations: []string{"test"}, - Concurrency: 1, // choose a very low value to check that we don't run into deadlocks - Scheduler: scheduler, - DeterministicCQID: deterministicCQID, - } - if err := plugin.Init(ctx, spec); err != nil { - t.Fatal(err) - } - - resources := make(chan *schema.Resource) - g, ctx := errgroup.WithContext(ctx) - g.Go(func() error { - defer close(resources) - return plugin.Sync(ctx, - testSyncTime, - resources) - }) - - var i int - for resource := range resources { - if tc.data == nil { - t.Fatalf("Unexpected resource %v", resource) - } - if i >= len(tc.data) { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - if !resource.GetValues().Equal(tc.data[i]) { - t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], resource.GetValues()) - } - i++ - } - if len(tc.data) != i { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - - stats := plugin.Metrics() - if !tc.stats.Equal(stats) { - t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) - } - if err := g.Wait(); err != nil { - t.Fatal(err) - } -} - -func TestIgnoredColumns(t *testing.T) { - validateResources(t, schema.Resources{{ - Item: struct{ A *string }{}, - Table: &schema.Table{ - Columns: schema.ColumnList{ - { - Name: "a", - Type: arrow.BinaryTypes.String, - IgnoreInTests: true, - }, - }, - }, - }}) -} - -var testTable struct { - PrimaryKey string - SecondaryKey string - TertiaryKey string - Quaternary string -} - -func TestNewPluginPrimaryKeys(t *testing.T) { - testTransforms := []struct { - transformerOptions []transformers.StructTransformerOption - resultKeys []string - }{ - { - transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, - resultKeys: []string{"primary_key"}, - }, - { - transformerOptions: []transformers.StructTransformerOption{}, - resultKeys: []string{"_cq_id"}, - }, - } - for _, tc := range testTransforms { - tables := []*schema.Table{ - { - Name: "test_table", - Transform: transformers.TransformWithStruct( - &testTable, tc.transformerOptions..., - ), - }, - } - - plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) - assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) - } -} diff --git a/plugin/scheduler.go b/plugin/scheduler.go index 373147d194..c00ed2c8a9 100644 --- a/plugin/scheduler.go +++ b/plugin/scheduler.go @@ -9,7 +9,7 @@ import ( "sync/atomic" "time" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "github.com/rs/zerolog" "github.com/thoas/go-funk" diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go index 9390966395..9b592be865 100644 --- a/plugin/scheduler_dfs.go +++ b/plugin/scheduler_dfs.go @@ -8,9 +8,9 @@ import ( "sync" "sync/atomic" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/helpers" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/helpers" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "golang.org/x/sync/semaphore" ) diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go index 0554f5489e..5c6e90b391 100644 --- a/plugin/scheduler_round_robin.go +++ b/plugin/scheduler_round_robin.go @@ -4,8 +4,8 @@ import ( "context" "sync" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/schema" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" "golang.org/x/sync/semaphore" ) diff --git a/plugin/scheduler_round_robin_test.go b/plugin/scheduler_round_robin_test.go index daf7cc242f..428b13c8a6 100644 --- a/plugin/scheduler_round_robin_test.go +++ b/plugin/scheduler_round_robin_test.go @@ -3,7 +3,7 @@ package plugin import ( "testing" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) func TestRoundRobinInterleave(t *testing.T) { diff --git a/plugins/destination/plugin_testing_overwrite_delete_stale.go b/plugin/testing_overwrite_deletestale.go similarity index 91% rename from plugins/destination/plugin_testing_overwrite_delete_stale.go rename to plugin/testing_overwrite_deletestale.go index 4339bb1d43..788decd8a4 100644 --- a/plugins/destination/plugin_testing_overwrite_delete_stale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -1,20 +1,21 @@ -package destination +package plugin import ( "context" "fmt" "time" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec specs.Destination, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteMode = specs.WriteModeOverwriteDeleteStale - if err := p.Init(ctx, logger, spec); err != nil { +func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { + spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) @@ -31,9 +32,9 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, - Backend: specs.BackendLocal, + // Backend: specs.BackendLocal, } opts := schema.GenTestDataOptions{ @@ -149,4 +150,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } return nil -} +} \ No newline at end of file diff --git a/plugin/testing.go b/plugin/testing_sync.go similarity index 73% rename from plugin/testing.go rename to plugin/testing_sync.go index 562da87461..0c0d6f939e 100644 --- a/plugin/testing.go +++ b/plugin/testing_sync.go @@ -2,11 +2,15 @@ package plugin import ( "context" + "fmt" + "strings" "testing" "time" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) @@ -25,7 +29,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te t.Parallel() } - resourcesChannel := make(chan *schema.Resource) + resourcesChannel := make(chan arrow.Record) var syncErr error if err := plugin.Init(context.Background(), spec); err != nil { @@ -37,16 +41,16 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te syncErr = plugin.Sync(context.Background(), time.Now(), *spec.SyncSpec, resourcesChannel) }() - syncedResources := make([]*schema.Resource, 0) + syncedResources := make([]arrow.Record, 0) for resource := range resourcesChannel { syncedResources = append(syncedResources, resource) } if syncErr != nil { t.Fatal(syncErr) } - for _, validator := range o.validators { - validator(t, plugin, syncedResources) - } + // for _, validator := range o.validators { + // validator(t, plugin, syncedResources) + // } } type TestPluginOption func(*testPluginOptions) @@ -139,3 +143,27 @@ func validateResources(t *testing.T, resources []*schema.Resource) { } } } + +func RecordDiff(l arrow.Record, r arrow.Record) string { + var sb strings.Builder + if l.NumCols() != r.NumCols() { + return fmt.Sprintf("different number of columns: %d vs %d", l.NumCols(), r.NumCols()) + } + if l.NumRows() != r.NumRows() { + return fmt.Sprintf("different number of rows: %d vs %d", l.NumRows(), r.NumRows()) + } + for i := 0; i < int(l.NumCols()); i++ { + edits, err := array.Diff(l.Column(i), r.Column(i)) + if err != nil { + panic(fmt.Sprintf("left: %v, right: %v, error: %v", l.Column(i).DataType(), r.Column(i).DataType(), err)) + } + diff := edits.UnifiedDiff(l.Column(i), r.Column(i)) + if diff != "" { + sb.WriteString(l.Schema().Field(i).Name) + sb.WriteString(": ") + sb.WriteString(diff) + sb.WriteString("\n") + } + } + return sb.String() +} diff --git a/plugins/destination/plugin_testing.go b/plugin/testing_write.go similarity index 88% rename from plugins/destination/plugin_testing.go rename to plugin/testing_write.go index c3ee806aed..17fc3f6100 100644 --- a/plugins/destination/plugin_testing.go +++ b/plugin/testing_write.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "context" @@ -10,9 +10,9 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/rs/zerolog" ) @@ -22,11 +22,11 @@ type PluginTestSuite struct { // MigrateStrategy defines which tests we should include type MigrateStrategy struct { - AddColumn specs.MigrateMode - AddColumnNotNull specs.MigrateMode - RemoveColumn specs.MigrateMode - RemoveColumnNotNull specs.MigrateMode - ChangeColumn specs.MigrateMode + AddColumn pbPlugin.WriteSpec_MIGRATE_MODE + AddColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE + RemoveColumn pbPlugin.WriteSpec_MIGRATE_MODE + RemoveColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE + ChangeColumn pbPlugin.WriteSpec_MIGRATE_MODE } type PluginTestSuiteTests struct { @@ -167,7 +167,7 @@ func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { } } -func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs.Destination, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { +func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlugin.Spec, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { t.Helper() destSpec.Name = "testsuite" @@ -222,8 +222,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateOverwrite { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeOverwrite - destSpec.MigrateMode = specs.MigrateModeSafe + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE destSpec.Name = "test_migrate_overwrite" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) }) @@ -233,8 +233,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateOverwriteForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeOverwrite - destSpec.MigrateMode = specs.MigrateModeForced + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE destSpec.Name = "test_migrate_overwrite_force" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) }) @@ -259,8 +259,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateAppend { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeAppend - destSpec.MigrateMode = specs.MigrateModeSafe + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE destSpec.Name = "test_migrate_append" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) }) @@ -270,8 +270,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs if suite.tests.SkipMigrateAppendForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteMode = specs.WriteModeAppend - destSpec.MigrateMode = specs.MigrateModeForced + destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE destSpec.Name = "test_migrate_append_force" suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) }) @@ -291,4 +291,4 @@ func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { } return first.Before(second) }) -} +} \ No newline at end of file diff --git a/plugins/destination/plugin_testing_write_append.go b/plugin/testing_write_append.go similarity index 85% rename from plugins/destination/plugin_testing_write_append.go rename to plugin/testing_write_append.go index a3f0445c27..d56d20287e 100644 --- a/plugins/destination/plugin_testing_write_append.go +++ b/plugin/testing_write_append.go @@ -1,19 +1,20 @@ -package destination +package plugin import ( "context" "fmt" "time" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, spec specs.Destination, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteMode = specs.WriteModeAppend - if err := p.Init(ctx, logger, spec); err != nil { +func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { + spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) @@ -27,7 +28,7 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } sourceName := "testAppendSource" + uuid.NewString() - specSource := specs.Source{ + specSource := pbPlugin.Spec{ Name: sourceName, } @@ -92,4 +93,4 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } return nil -} +} \ No newline at end of file diff --git a/plugins/destination/plugin_testing_migrate.go b/plugin/testing_write_migrate.go similarity index 85% rename from plugins/destination/plugin_testing_migrate.go rename to plugin/testing_write_migrate.go index b28ef18f50..d0c8b54ea8 100644 --- a/plugins/destination/plugin_testing_migrate.go +++ b/plugin/testing_write_migrate.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "context" @@ -8,9 +8,10 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" "github.com/stretchr/testify/require" @@ -20,8 +21,8 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, spec specs.Destination, target *schema.Table, source *schema.Table, mode specs.MigrateMode, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, logger, spec); err != nil { +func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, target *schema.Table, source *schema.Table, mode pbPlugin.WriteSpec_MIGRATE_MODE, testOpts PluginTestSuiteRunnerOptions) error { + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } @@ -30,7 +31,7 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. } sourceName := target.Name - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } syncTime := time.Now().UTC().Round(1 * time.Second) @@ -64,7 +65,7 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. return fmt.Errorf("failed to read all: %w", err) } sortRecordsBySyncTime(target, resourcesRead) - if mode == specs.MigrateModeSafe { + if mode == pbPlugin.WriteSpec_SAFE { if len(resourcesRead) != 2 { return fmt.Errorf("expected 2 resources after write, got %d", len(resourcesRead)) } @@ -90,14 +91,14 @@ func (*PluginTestSuite) destinationPluginTestMigrate( t *testing.T, newPlugin NewPluginFunc, logger zerolog.Logger, - spec specs.Destination, + spec pbPlugin.Spec, strategy MigrateStrategy, testOpts PluginTestSuiteRunnerOptions, ) { - spec.BatchSize = 1 + spec.WriteSpec.BatchSize = 1 t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.AddColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -133,7 +134,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.AddColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -167,7 +168,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.RemoveColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -200,7 +201,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.RemoveColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -234,7 +235,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == specs.MigrateModeForced && spec.MigrateMode == specs.MigrateModeSafe { + if strategy.ChangeColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { t.Skip("skipping as migrate mode is safe") return } @@ -272,12 +273,12 @@ func (*PluginTestSuite) destinationPluginTestMigrate( table := schema.TestTable(tableName, testOpts.TestSourceOptions) p := newPlugin() - require.NoError(t, p.Init(ctx, logger, spec)) + require.NoError(t, p.Init(ctx, spec)) require.NoError(t, p.Migrate(ctx, schema.Tables{table})) nonForced := spec - nonForced.MigrateMode = specs.MigrateModeSafe - require.NoError(t, p.Init(ctx, logger, nonForced)) + nonForced.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + require.NoError(t, p.Init(ctx, nonForced)) require.NoError(t, p.Migrate(ctx, schema.Tables{table})) }) -} +} \ No newline at end of file diff --git a/plugins/destination/plugin_testing_overwrite.go b/plugin/testing_write_overwrite.go similarity index 87% rename from plugins/destination/plugin_testing_overwrite.go rename to plugin/testing_write_overwrite.go index f77285ff63..a7dba53037 100644 --- a/plugins/destination/plugin_testing_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -1,20 +1,21 @@ -package destination +package plugin import ( "context" "fmt" "time" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/apache/arrow/go/v13/arrow/array" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec specs.Destination, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteMode = specs.WriteModeOverwrite - if err := p.Init(ctx, logger, spec); err != nil { +func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { + spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) @@ -28,7 +29,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := specs.Source{ + sourceSpec := pbPlugin.Spec{ Name: sourceName, } @@ -108,4 +109,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } return nil -} +} \ No newline at end of file diff --git a/plugin/validate.go b/plugin/validate.go index 0b21133b05..6f557e9c1f 100644 --- a/plugin/validate.go +++ b/plugin/validate.go @@ -3,7 +3,7 @@ package plugin import ( "fmt" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/schema" ) func (p *Plugin) validate(tables schema.Tables) error { diff --git a/plugins/destination/metrics.go b/plugins/destination/metrics.go deleted file mode 100644 index d00613ecf8..0000000000 --- a/plugins/destination/metrics.go +++ /dev/null @@ -1,8 +0,0 @@ -package destination - -type Metrics struct { - // Errors number of errors / failed writes - Errors uint64 - // Writes number of successful writes - Writes uint64 -} diff --git a/plugins/destination/plugin.go b/plugins/destination/plugin.go deleted file mode 100644 index 1d40f6af80..0000000000 --- a/plugins/destination/plugin.go +++ /dev/null @@ -1,314 +0,0 @@ -package destination - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" -) - -type writerType int - -const ( - unmanaged writerType = iota - managed -) - -const ( - defaultBatchTimeoutSeconds = 20 - defaultBatchSize = 10000 - defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB -) - -type NewClientFunc func(context.Context, zerolog.Logger, specs.Destination) (Client, error) - -type ManagedWriter interface { - WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error -} - -type UnimplementedManagedWriter struct{} - -var _ ManagedWriter = UnimplementedManagedWriter{} - -func (UnimplementedManagedWriter) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { - panic("WriteTableBatch not implemented") -} - -type UnmanagedWriter interface { - Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error - Metrics() Metrics -} - -var _ UnmanagedWriter = UnimplementedUnmanagedWriter{} - -type UnimplementedUnmanagedWriter struct{} - -func (UnimplementedUnmanagedWriter) Write(context.Context, schema.Tables, <-chan arrow.Record) error { - panic("Write not implemented") -} - -func (UnimplementedUnmanagedWriter) Metrics() Metrics { - panic("Metrics not implemented") -} - -type Client interface { - Migrate(ctx context.Context, tables schema.Tables) error - Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error - ManagedWriter - UnmanagedWriter - DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error - Close(ctx context.Context) error -} - -type ClientResource struct { - TableName string - Data []any -} - -type Option func(*Plugin) - -type Plugin struct { - // Name of destination plugin i.e postgresql,snowflake - name string - // Version of the destination plugin - version string - // Called upon configure call to validate and init configuration - newClient NewClientFunc - writerType writerType - // initialized destination client - client Client - // spec the client was initialized with - spec specs.Destination - // Logger to call, this logger is passed to the serve.Serve Client, if not define Serve will create one instead. - logger zerolog.Logger - - // This is in use if the user passed a managed client - metrics map[string]*Metrics - metricsLock *sync.RWMutex - - workers map[string]*worker - workersLock *sync.Mutex - - batchTimeout time.Duration - defaultBatchSize int - defaultBatchSizeBytes int -} - -func WithManagedWriter() Option { - return func(p *Plugin) { - p.writerType = managed - } -} - -func WithBatchTimeout(seconds int) Option { - return func(p *Plugin) { - p.batchTimeout = time.Duration(seconds) * time.Second - } -} - -func WithDefaultBatchSize(defaultBatchSize int) Option { - return func(p *Plugin) { - p.defaultBatchSize = defaultBatchSize - } -} - -func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { - return func(p *Plugin) { - p.defaultBatchSizeBytes = defaultBatchSizeBytes - } -} - -// NewPlugin creates a new destination plugin -func NewPlugin(name string, version string, newClientFunc NewClientFunc, opts ...Option) *Plugin { - p := &Plugin{ - name: name, - version: version, - newClient: newClientFunc, - metrics: make(map[string]*Metrics), - metricsLock: &sync.RWMutex{}, - workers: make(map[string]*worker), - workersLock: &sync.Mutex{}, - batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, - defaultBatchSize: defaultBatchSize, - defaultBatchSizeBytes: defaultBatchSizeBytes, - } - if newClientFunc == nil { - // we do this check because we only call this during runtime later on so it can fail - // before the server starts - panic("newClientFunc can't be nil") - } - for _, opt := range opts { - opt(p) - } - return p -} - -func (p *Plugin) Name() string { - return p.name -} - -func (p *Plugin) Version() string { - return p.version -} - -func (p *Plugin) Metrics() Metrics { - switch p.writerType { - case unmanaged: - return p.client.Metrics() - case managed: - metrics := Metrics{} - p.metricsLock.RLock() - for _, m := range p.metrics { - metrics.Errors += m.Errors - metrics.Writes += m.Writes - } - p.metricsLock.RUnlock() - return metrics - default: - panic("unknown client type") - } -} - -// we need lazy loading because we want to be able to initialize after -func (p *Plugin) Init(ctx context.Context, logger zerolog.Logger, spec specs.Destination) error { - var err error - p.logger = logger - p.spec = spec - p.spec.SetDefaults(p.defaultBatchSize, p.defaultBatchSizeBytes) - p.client, err = p.newClient(ctx, logger, p.spec) - if err != nil { - return err - } - return nil -} - -// we implement all DestinationClient functions so we can hook into pre-post behavior -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables) error { - if err := checkDestinationColumns(tables); err != nil { - return err - } - return p.client.Migrate(ctx, tables) -} - -func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { - var readErr error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - readErr = p.Read(ctx, table, sourceName, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, readErr -} - -func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return p.client.Read(ctx, table, sourceName, res) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, sourceSpec specs.Source, syncTime time.Time, resource arrow.Record) error { - resources := []arrow.Record{resource} - return p.writeAll(ctx, sourceSpec, syncTime, resources) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, sourceSpec specs.Source, syncTime time.Time, resources []arrow.Record) error { - ch := make(chan arrow.Record, len(resources)) - for _, resource := range resources { - ch <- resource - } - close(ch) - tables := make(schema.Tables, 0) - tableNames := make(map[string]struct{}) - for _, resource := range resources { - sc := resource.Schema() - tableMD := sc.Metadata() - name, found := tableMD.GetValue(schema.MetadataTableName) - if !found { - return fmt.Errorf("missing table name") - } - if _, ok := tableNames[name]; ok { - continue - } - table, err := schema.NewTableFromArrowSchema(resource.Schema()) - if err != nil { - return err - } - tables = append(tables, table) - tableNames[table.Name] = struct{}{} - } - return p.Write(ctx, sourceSpec, tables, syncTime, ch) -} - -func (p *Plugin) Write(ctx context.Context, sourceSpec specs.Source, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { - syncTime = syncTime.UTC() - if err := checkDestinationColumns(tables); err != nil { - return err - } - switch p.writerType { - case unmanaged: - if err := p.writeUnmanaged(ctx, sourceSpec, tables, syncTime, res); err != nil { - return err - } - case managed: - if err := p.writeManagedTableBatch(ctx, sourceSpec, tables, syncTime, res); err != nil { - return err - } - default: - panic("unknown client type") - } - if p.spec.WriteMode == specs.WriteModeOverwriteDeleteStale { - tablesToDelete := tables - if sourceSpec.Backend != specs.BackendNone { - tablesToDelete = make(schema.Tables, 0, len(tables)) - for _, t := range tables { - if !t.IsIncremental { - tablesToDelete = append(tablesToDelete, t) - } - } - } - if err := p.DeleteStale(ctx, tablesToDelete, sourceSpec.Name, syncTime); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - syncTime = syncTime.UTC() - return p.client.DeleteStale(ctx, tables, sourceName, syncTime) -} - -func (p *Plugin) Close(ctx context.Context) error { - return p.client.Close(ctx) -} - -func checkDestinationColumns(tables schema.Tables) error { - for _, table := range tables { - if table.Columns.Index(schema.CqSourceNameColumn.Name) == -1 { - return fmt.Errorf("table %s is missing column %s. please consider upgrading source plugin", table.Name, schema.CqSourceNameColumn.Name) - } - if table.Columns.Index(schema.CqSyncTimeColumn.Name) == -1 { - return fmt.Errorf("table %s is missing column %s. please consider upgrading source plugin", table.Name, schema.CqSourceNameColumn.Name) - } - column := table.Columns.Get(schema.CqIDColumn.Name) - if column != nil { - if !column.NotNull { - return fmt.Errorf("column %s.%s cannot be nullable. please consider upgrading source plugin", table.Name, schema.CqIDColumn.Name) - } - if !column.Unique { - return fmt.Errorf("column %s.%s must be unique. please consider upgrading source plugin", table.Name, schema.CqIDColumn.Name) - } - } - } - return nil -} diff --git a/plugins/destination/unmanaged_writer.go b/plugins/destination/unmanaged_writer.go deleted file mode 100644 index cdb3466b09..0000000000 --- a/plugins/destination/unmanaged_writer.go +++ /dev/null @@ -1,14 +0,0 @@ -package destination - -import ( - "context" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { - return p.client.Write(ctx, tables, res) -} diff --git a/plugins/docs.go b/plugins/docs.go deleted file mode 100644 index 2e21a01945..0000000000 --- a/plugins/docs.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package plugins defines APIs for source and destination plugins -package plugins diff --git a/plugins/source/benchmark_test.go b/plugins/source/benchmark_test.go deleted file mode 100644 index 71ccdc929d..0000000000 --- a/plugins/source/benchmark_test.go +++ /dev/null @@ -1,429 +0,0 @@ -package source - -import ( - "context" - "fmt" - "math/rand" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" - "golang.org/x/sync/errgroup" -) - -type BenchmarkScenario struct { - Client Client - Scheduler specs.Scheduler - Clients int - Tables int - ChildrenPerTable int - Columns int - ColumnResolvers int // number of columns with custom resolvers - ResourcesPerTable int - ResourcesPerPage int - NoPreResourceResolver bool - Concurrency uint64 -} - -func (s *BenchmarkScenario) SetDefaults() { - if s.Clients == 0 { - s.Clients = 1 - } - if s.Tables == 0 { - s.Tables = 1 - } - if s.Columns == 0 { - s.Columns = 10 - } - if s.ResourcesPerTable == 0 { - s.ResourcesPerTable = 100 - } - if s.ResourcesPerPage == 0 { - s.ResourcesPerPage = 10 - } -} - -type Client interface { - Call(clientID, tableName string) error -} - -type Benchmark struct { - *BenchmarkScenario - - b *testing.B - tables []*schema.Table - plugin *Plugin - - apiCalls atomic.Int64 -} - -func NewBenchmark(b *testing.B, scenario BenchmarkScenario) *Benchmark { - scenario.SetDefaults() - sb := &Benchmark{ - BenchmarkScenario: &scenario, - b: b, - tables: nil, - plugin: nil, - } - sb.setup(b) - return sb -} - -func (s *Benchmark) setup(b *testing.B) { - createResolvers := func(tableName string) (schema.TableResolver, schema.RowResolver, schema.ColumnResolver) { - tableResolver := func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { - total := 0 - for total < s.ResourcesPerTable { - s.simulateAPICall(meta.ID(), tableName) - num := min(s.ResourcesPerPage, s.ResourcesPerTable-total) - resources := make([]struct { - Column1 string - }, num) - for i := 0; i < num; i++ { - resources[i] = struct { - Column1 string - }{ - Column1: "test-column", - } - } - res <- resources - total += num - } - return nil - } - preResourceResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource) error { - s.simulateAPICall(meta.ID(), tableName) - resource.Item = struct { - Column1 string - }{ - Column1: "test-pre", - } - return nil - } - columnResolver := func(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - s.simulateAPICall(meta.ID(), tableName) - return resource.Set(c.Name, "test") - } - return tableResolver, preResourceResolver, columnResolver - } - - s.tables = make([]*schema.Table, s.Tables) - for i := 0; i < s.Tables; i++ { - tableResolver, preResourceResolver, columnResolver := createResolvers(fmt.Sprintf("table%d", i)) - columns := make([]schema.Column, s.Columns) - for u := 0; u < s.Columns; u++ { - columns[u] = schema.Column{ - Name: fmt.Sprintf("column%d", u), - Type: arrow.BinaryTypes.String, - } - if u < s.ColumnResolvers { - columns[u].Resolver = columnResolver - } - } - relations := make([]*schema.Table, s.ChildrenPerTable) - for u := 0; u < s.ChildrenPerTable; u++ { - relations[u] = &schema.Table{ - Name: fmt.Sprintf("table%d_child%d", i, u), - Columns: columns, - Resolver: tableResolver, - } - if !s.NoPreResourceResolver { - relations[u].PreResourceResolver = preResourceResolver - } - } - s.tables[i] = &schema.Table{ - Name: fmt.Sprintf("table%d", i), - Columns: columns, - Relations: relations, - Resolver: tableResolver, - Multiplex: nMultiplexer(s.Clients), - } - if !s.NoPreResourceResolver { - s.tables[i].PreResourceResolver = preResourceResolver - } - for u := range relations { - relations[u].Parent = s.tables[i] - } - } - - plugin := NewPlugin( - "testPlugin", - "1.0.0", - s.tables, - newTestExecutionClient, - ) - plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(b)).Level(zerolog.WarnLevel)) - s.plugin = plugin - s.b = b -} - -func (s *Benchmark) simulateAPICall(clientID, tableName string) { - for { - s.apiCalls.Add(1) - err := s.Client.Call(clientID, tableName) - if err == nil { - // if no error, we are done - break - } - // if error, we have to retry - // we simulate a random backoff - time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) - } -} - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -func (s *Benchmark) Run() { - for n := 0; n < s.b.N; n++ { - s.b.StopTimer() - ctx := context.Background() - spec := specs.Source{ - Name: "testSource", - Path: "cloudquery/testSource", - Tables: []string{"*"}, - Version: "v1.0.0", - Destinations: []string{"test"}, - Concurrency: s.Concurrency, - Scheduler: s.Scheduler, - } - if err := s.plugin.Init(ctx, spec); err != nil { - s.b.Fatal(err) - } - resources := make(chan *schema.Resource) - g, ctx := errgroup.WithContext(ctx) - g.Go(func() error { - defer close(resources) - return s.plugin.Sync(ctx, - time.Now(), - resources) - }) - s.b.StartTimer() - start := time.Now() - - totalResources := 0 - for range resources { - // read resources channel until empty - totalResources++ - } - if err := g.Wait(); err != nil { - s.b.Fatal(err) - } - - end := time.Now() - s.b.ReportMetric(0, "ns/op") // drop default ns/op output - s.b.ReportMetric(float64(totalResources)/(end.Sub(start).Seconds()), "resources/s") - - // Enable the below metrics for more verbose information about the scenario: - // s.b.ReportMetric(float64(s.apiCalls.Load())/(end.Sub(start).Seconds()), "api-calls/s") - // s.b.ReportMetric(float64(totalResources), "resources") - // s.b.ReportMetric(float64(s.apiCalls.Load()), "apiCalls") - } -} - -type benchmarkClient struct { - num int -} - -func (b benchmarkClient) ID() string { - return fmt.Sprintf("client%d", b.num) -} - -func nMultiplexer(n int) schema.Multiplexer { - return func(meta schema.ClientMeta) []schema.ClientMeta { - clients := make([]schema.ClientMeta, n) - for i := 0; i < n; i++ { - clients[i] = benchmarkClient{ - num: i, - } - } - return clients - } -} - -func BenchmarkDefaultConcurrencyDFS(b *testing.B) { - benchmarkWithScheduler(b, specs.SchedulerDFS) -} - -func BenchmarkDefaultConcurrencyRoundRobin(b *testing.B) { - benchmarkWithScheduler(b, specs.SchedulerRoundRobin) -} - -func benchmarkWithScheduler(b *testing.B, scheduler specs.Scheduler) { - b.ReportAllocs() - minTime := 1 * time.Millisecond - mean := 10 * time.Millisecond - stdDev := 100 * time.Millisecond - client := NewDefaultClient(minTime, mean, stdDev) - bs := BenchmarkScenario{ - Client: client, - Clients: 25, - Tables: 5, - Columns: 10, - ColumnResolvers: 1, - ResourcesPerTable: 100, - ResourcesPerPage: 50, - Scheduler: scheduler, - } - sb := NewBenchmark(b, bs) - sb.Run() -} - -func BenchmarkTablesWithChildrenDFS(b *testing.B) { - benchmarkTablesWithChildrenScheduler(b, specs.SchedulerDFS) -} - -func BenchmarkTablesWithChildrenRoundRobin(b *testing.B) { - benchmarkTablesWithChildrenScheduler(b, specs.SchedulerRoundRobin) -} - -func benchmarkTablesWithChildrenScheduler(b *testing.B, scheduler specs.Scheduler) { - b.ReportAllocs() - minTime := 1 * time.Millisecond - mean := 10 * time.Millisecond - stdDev := 100 * time.Millisecond - client := NewDefaultClient(minTime, mean, stdDev) - bs := BenchmarkScenario{ - Client: client, - Clients: 2, - Tables: 2, - ChildrenPerTable: 2, - Columns: 10, - ColumnResolvers: 1, - ResourcesPerTable: 100, - ResourcesPerPage: 50, - Scheduler: scheduler, - } - sb := NewBenchmark(b, bs) - sb.Run() -} - -type DefaultClient struct { - min, stdDev, mean time.Duration -} - -func NewDefaultClient(min, mean, stdDev time.Duration) *DefaultClient { - if min == 0 { - min = time.Millisecond - } - if mean == 0 { - mean = 10 * time.Millisecond - } - if stdDev == 0 { - stdDev = 100 * time.Millisecond - } - return &DefaultClient{ - min: min, - mean: mean, - stdDev: stdDev, - } -} - -func (c *DefaultClient) Call(_, _ string) error { - sample := int(rand.NormFloat64()*float64(c.stdDev) + float64(c.mean)) - duration := time.Duration(sample) - if duration < c.min { - duration = c.min - } - time.Sleep(duration) - return nil -} - -type RateLimitClient struct { - *DefaultClient - calls map[string][]time.Time - callsLock sync.Mutex - window time.Duration - maxCallsPerWindow int -} - -func NewRateLimitClient(min, mean, stdDev time.Duration, maxCallsPerWindow int, window time.Duration) *RateLimitClient { - return &RateLimitClient{ - DefaultClient: NewDefaultClient(min, mean, stdDev), - calls: map[string][]time.Time{}, - window: window, - maxCallsPerWindow: maxCallsPerWindow, - } -} - -func (r *RateLimitClient) Call(clientID, table string) error { - // this will sleep for the appropriate amount of time before responding - err := r.DefaultClient.Call(clientID, table) - if err != nil { - return err - } - - r.callsLock.Lock() - defer r.callsLock.Unlock() - - // limit the number of calls per window by table - key := table - - // remove calls from outside the call window - updated := make([]time.Time, 0, len(r.calls[key])) - for i := range r.calls[key] { - if time.Since(r.calls[key][i]) < r.window { - updated = append(updated, r.calls[key][i]) - } - } - - // return error if we've exceeded the max calls in the time window - if len(updated) >= r.maxCallsPerWindow { - return fmt.Errorf("rate limit exceeded") - } - - r.calls[key] = append(r.calls[key], time.Now()) - return nil -} - -// BenchmarkDefaultConcurrency represents a benchmark scenario where rate limiting is applied -// by the cloud provider. In this rate limiter, the limit is applied globally per table. -// This mirrors the behavior of GCP, where rate limiting is applied per project *token*, not -// per project. A good scheduler should spread the load across tables so that other tables can make -// progress while waiting for the rate limit to reset. -func BenchmarkTablesWithRateLimitingDFS(b *testing.B) { - benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerDFS) -} - -func BenchmarkTablesWithRateLimitingRoundRobin(b *testing.B) { - benchmarkTablesWithRateLimitingScheduler(b, specs.SchedulerRoundRobin) -} - -// In this benchmark, we set up a scenario where each table has a global rate limit of 1 call per 100ms. -// Every table requires 1 call to resolve, and has 10 clients. This means, at best, each table can resolve in 1 second. -// We have 100 such tables and a concurrency that allows 1000 calls at a time. A good scheduler for this scenario -// should be able to resolve all tables in a bit more than 1 second. -func benchmarkTablesWithRateLimitingScheduler(b *testing.B, scheduler specs.Scheduler) { - b.ReportAllocs() - minTime := 1 * time.Millisecond - mean := 1 * time.Millisecond - stdDev := 1 * time.Millisecond - maxCallsPerWindow := 1 - window := 100 * time.Millisecond - c := NewRateLimitClient(minTime, mean, stdDev, maxCallsPerWindow, window) - - bs := BenchmarkScenario{ - Client: c, - Scheduler: scheduler, - Clients: 10, - Tables: 100, - ChildrenPerTable: 0, - Columns: 10, - ColumnResolvers: 0, - ResourcesPerTable: 1, - ResourcesPerPage: 1, - Concurrency: 1000, - NoPreResourceResolver: true, - } - sb := NewBenchmark(b, bs) - sb.Run() -} diff --git a/plugins/source/docs.go b/plugins/source/docs.go deleted file mode 100644 index f21d926856..0000000000 --- a/plugins/source/docs.go +++ /dev/null @@ -1,241 +0,0 @@ -package source - -import ( - "bytes" - "embed" - "encoding/json" - "fmt" - "os" - "path/filepath" - "regexp" - "sort" - "text/template" - - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -//go:embed templates/*.go.tpl -var templatesFS embed.FS - -var reMatchNewlines = regexp.MustCompile(`\n{3,}`) -var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) - -var DefaultTitleExceptions = map[string]string{ - // common abbreviations - "acl": "ACL", - "acls": "ACLs", - "api": "API", - "apis": "APIs", - "ca": "CA", - "cidr": "CIDR", - "cidrs": "CIDRs", - "db": "DB", - "dbs": "DBs", - "dhcp": "DHCP", - "iam": "IAM", - "iot": "IOT", - "ip": "IP", - "ips": "IPs", - "ipv4": "IPv4", - "ipv6": "IPv6", - "mfa": "MFA", - "ml": "ML", - "oauth": "OAuth", - "vpc": "VPC", - "vpcs": "VPCs", - "vpn": "VPN", - "vpns": "VPNs", - "waf": "WAF", - "wafs": "WAFs", - - // cloud providers - "aws": "AWS", - "gcp": "GCP", -} - -func DefaultTitleTransformer(table *schema.Table) string { - if table.Title != "" { - return table.Title - } - csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) - return csr.ToTitle(table.Name) -} - -func sortTables(tables schema.Tables) { - sort.SliceStable(tables, func(i, j int) bool { - return tables[i].Name < tables[j].Name - }) - - for _, table := range tables { - sortTables(table.Relations) - } -} - -type templateData struct { - PluginName string - Tables schema.Tables -} - -// GeneratePluginDocs creates table documentation for the source plugin based on its list of tables -func (p *Plugin) GeneratePluginDocs(dir, format string) error { - if err := os.MkdirAll(dir, os.ModePerm); err != nil { - return err - } - - setDestinationManagedCqColumns(p.Tables()) - - sortedTables := make(schema.Tables, 0, len(p.Tables())) - for _, t := range p.Tables() { - sortedTables = append(sortedTables, t.Copy(nil)) - } - sortTables(sortedTables) - - switch format { - case "markdown": - return p.renderTablesAsMarkdown(dir, p.name, sortedTables) - case "json": - return p.renderTablesAsJSON(dir, sortedTables) - default: - return fmt.Errorf("unsupported format: %v", format) - } -} - -// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). -func setDestinationManagedCqColumns(tables []*schema.Table) { - for _, table := range tables { - table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) - table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) - setDestinationManagedCqColumns(table.Relations) - } -} - -type jsonTable struct { - Name string `json:"name"` - Title string `json:"title"` - Description string `json:"description"` - Columns []jsonColumn `json:"columns"` - Relations []jsonTable `json:"relations"` -} - -type jsonColumn struct { - Name string `json:"name"` - Type string `json:"type"` - IsPrimaryKey bool `json:"is_primary_key,omitempty"` - IsIncrementalKey bool `json:"is_incremental_key,omitempty"` -} - -func (p *Plugin) renderTablesAsJSON(dir string, tables schema.Tables) error { - jsonTables := p.jsonifyTables(tables) - buffer := &bytes.Buffer{} - m := json.NewEncoder(buffer) - m.SetIndent("", " ") - m.SetEscapeHTML(false) - err := m.Encode(jsonTables) - if err != nil { - return err - } - outputPath := filepath.Join(dir, "__tables.json") - return os.WriteFile(outputPath, buffer.Bytes(), 0644) -} - -func (p *Plugin) jsonifyTables(tables schema.Tables) []jsonTable { - jsonTables := make([]jsonTable, len(tables)) - for i, table := range tables { - jsonColumns := make([]jsonColumn, len(table.Columns)) - for c, col := range table.Columns { - jsonColumns[c] = jsonColumn{ - Name: col.Name, - Type: col.Type.String(), - IsPrimaryKey: col.PrimaryKey, - IsIncrementalKey: col.IncrementalKey, - } - } - jsonTables[i] = jsonTable{ - Name: table.Name, - Title: p.titleTransformer(table), - Description: table.Description, - Columns: jsonColumns, - Relations: p.jsonifyTables(table.Relations), - } - } - return jsonTables -} - -func (p *Plugin) renderTablesAsMarkdown(dir string, pluginName string, tables schema.Tables) error { - for _, table := range tables { - if err := p.renderAllTables(table, dir); err != nil { - return err - } - } - t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ - "indentToDepth": indentToDepth, - }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template for README.md: %v", err) - } - - var b bytes.Buffer - if err := t.Execute(&b, templateData{PluginName: pluginName, Tables: tables}); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - outputPath := filepath.Join(dir, "README.md") - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return nil -} - -func (p *Plugin) renderAllTables(t *schema.Table, dir string) error { - if err := p.renderTable(t, dir); err != nil { - return err - } - for _, r := range t.Relations { - if err := p.renderAllTables(r, dir); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) renderTable(table *schema.Table, dir string) error { - t := template.New("").Funcs(map[string]any{ - "title": p.titleTransformer, - }) - t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template: %v", err) - } - - outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) - - var b bytes.Buffer - if err := t.Execute(&b, table); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return f.Close() -} - -func formatMarkdown(s string) string { - s = reMatchNewlines.ReplaceAllString(s, "\n\n") - return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") -} - -func indentToDepth(table *schema.Table) string { - s := "" - t := table - for t.Parent != nil { - s += " " - t = t.Parent - } - return s -} diff --git a/plugins/source/docs_test.go b/plugins/source/docs_test.go deleted file mode 100644 index 30d34814d3..0000000000 --- a/plugins/source/docs_test.go +++ /dev/null @@ -1,164 +0,0 @@ -//go:build !windows - -package source - -import ( - "os" - "path" - "testing" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/bradleyjkemp/cupaloy/v2" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" - "github.com/stretchr/testify/require" -) - -var testTables = []*schema.Table{ - { - Name: "test_table", - Description: "Description for test table", - Columns: []schema.Column{ - { - Name: "int_col", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "id_col", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - { - Name: "id_col2", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - { - Name: "json_col", - Type: types.ExtensionTypes.JSON, - }, - { - Name: "list_col", - Type: arrow.ListOf(arrow.PrimitiveTypes.Int64), - }, - { - Name: "map_col", - Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), - }, - { - Name: "struct_col", - Type: arrow.StructOf(arrow.Field{Name: "string_field", Type: arrow.BinaryTypes.String}, arrow.Field{Name: "int_field", Type: arrow.PrimitiveTypes.Int64}), - }, - }, - Relations: []*schema.Table{ - { - Name: "relation_table", - Description: "Description for relational table", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - Relations: []*schema.Table{ - { - Name: "relation_relation_table_b", - Description: "Description for relational table's relation", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - }, - { - Name: "relation_relation_table_a", - Description: "Description for relational table's relation", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - }, - }, - }, - { - Name: "relation_table2", - Description: "Description for second relational table", - Columns: []schema.Column{ - { - Name: "string_col", - Type: arrow.BinaryTypes.String, - }, - }, - }, - }, - }, - { - Name: "incremental_table", - Description: "Description for incremental table", - IsIncremental: true, - Columns: []schema.Column{ - { - Name: "int_col", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "id_col", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - IncrementalKey: true, - }, - { - Name: "id_col2", - Type: arrow.PrimitiveTypes.Int64, - IncrementalKey: true, - }, - }, - }, -} - -func TestGeneratePluginDocs(t *testing.T) { - p := NewPlugin("test", "v1.0.0", testTables, newTestExecutionClient) - - cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) - - t.Run("Markdown", func(t *testing.T) { - tmpdir := t.TempDir() - - err := p.GeneratePluginDocs(tmpdir, "markdown") - if err != nil { - t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) - } - - expectFiles := []string{"test_table.md", "relation_table.md", "relation_relation_table_a.md", "relation_relation_table_b.md", "incremental_table.md", "README.md"} - for _, exp := range expectFiles { - t.Run(exp, func(t *testing.T) { - output := path.Join(tmpdir, exp) - got, err := os.ReadFile(output) - require.NoError(t, err) - cup.SnapshotT(t, got) - }) - } - }) - - t.Run("JSON", func(t *testing.T) { - tmpdir := t.TempDir() - - err := p.GeneratePluginDocs(tmpdir, "json") - if err != nil { - t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) - } - - expectFiles := []string{"__tables.json"} - for _, exp := range expectFiles { - t.Run(exp, func(t *testing.T) { - output := path.Join(tmpdir, exp) - got, err := os.ReadFile(output) - require.NoError(t, err) - cup.SnapshotT(t, got) - }) - } - }) -} diff --git a/plugins/source/metrics.go b/plugins/source/metrics.go deleted file mode 100644 index 9975933779..0000000000 --- a/plugins/source/metrics.go +++ /dev/null @@ -1,207 +0,0 @@ -package source - -import ( - "sync" - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v3/schema" - "golang.org/x/exp/slices" -) - -type Metrics struct { - TableClient map[string]map[string]*TableClientMetrics -} - -type TableClientMetrics struct { - // These should only be accessed with 'Atomic*' methods. - Resources uint64 - Errors uint64 - Panics uint64 - - // These accesses must be protected by the mutex. - startTime time.Time - endTime time.Time - mutex sync.Mutex -} - -func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { - return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics -} - -// Equal compares to stats. Mostly useful in testing -func (s *Metrics) Equal(other *Metrics) bool { - for table, clientStats := range s.TableClient { - for client, stats := range clientStats { - if _, ok := other.TableClient[table]; !ok { - return false - } - if _, ok := other.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(other.TableClient[table][client]) { - return false - } - } - } - for table, clientStats := range other.TableClient { - for client, stats := range clientStats { - if _, ok := s.TableClient[table]; !ok { - return false - } - if _, ok := s.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(s.TableClient[table][client]) { - return false - } - } - } - return true -} - -func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { - s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) - for _, client := range clients { - s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} - } - for _, relation := range table.Relations { - s.initWithClients(relation, clients) - } -} - -func (s *Metrics) TotalErrors() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Errors - } - } - return total -} - -func (s *Metrics) TotalErrorsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Errors) - } - } - return total -} - -func (s *Metrics) TotalPanics() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Panics - } - } - return total -} - -func (s *Metrics) TotalPanicsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Panics) - } - } - return total -} - -func (s *Metrics) TotalResources() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Resources - } - } - return total -} - -func (s *Metrics) TotalResourcesAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Resources) - } - } - return total -} - -func (s *Metrics) MarkStart(table *schema.Table, clientID string) { - now := time.Now() - - s.TableClient[table.Name][clientID].mutex.Lock() - defer s.TableClient[table.Name][clientID].mutex.Unlock() - s.TableClient[table.Name][clientID].startTime = now -} - -// if the table is a top-level table, we need to mark all of its descendents as 'done' as well. -// This is because, when a top-level table is empty (no resources), its descendants are never actually -// synced. -func (s *Metrics) MarkEnd(table *schema.Table, clientID string) { - now := time.Now() - - if table.Parent == nil { - s.markEndRecursive(table, clientID, now) - return - } - - s.TableClient[table.Name][clientID].mutex.Lock() - defer s.TableClient[table.Name][clientID].mutex.Unlock() - s.TableClient[table.Name][clientID].endTime = now -} - -func (s *Metrics) markEndRecursive(table *schema.Table, clientID string, now time.Time) { - // We don't use defer with Unlock(), because we want to unlock the mutex as soon as possible. - s.TableClient[table.Name][clientID].mutex.Lock() - s.TableClient[table.Name][clientID].endTime = now - s.TableClient[table.Name][clientID].mutex.Unlock() - - for _, relation := range table.Relations { - s.markEndRecursive(relation, clientID, now) - } -} - -func (s *Metrics) InProgressTables() []string { - var inProgressTables []string - - for table, tableMetrics := range s.TableClient { - for _, clientMetrics := range tableMetrics { - clientMetrics.mutex.Lock() - endTime := clientMetrics.endTime - startTime := clientMetrics.startTime - clientMetrics.mutex.Unlock() - if endTime.IsZero() && !startTime.IsZero() { - inProgressTables = append(inProgressTables, table) - break - } - } - } - - slices.Sort(inProgressTables) - - return inProgressTables -} - -func (s *Metrics) QueuedTables() []string { - var queuedTables []string - - for table, tableMetrics := range s.TableClient { - for _, clientMetrics := range tableMetrics { - clientMetrics.mutex.Lock() - startTime := clientMetrics.startTime - endTime := clientMetrics.endTime - clientMetrics.mutex.Unlock() - if startTime.IsZero() && endTime.IsZero() { - queuedTables = append(queuedTables, table) - break - } - } - } - - slices.Sort(queuedTables) - return queuedTables -} diff --git a/plugins/source/metrics_test.go b/plugins/source/metrics_test.go deleted file mode 100644 index fb7488d47e..0000000000 --- a/plugins/source/metrics_test.go +++ /dev/null @@ -1,186 +0,0 @@ -package source - -import ( - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/stretchr/testify/assert" -) - -func TestMetrics(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if s.TotalResources() != 1 { - t.Fatal("expected 1 resource") - } - if s.TotalErrors() != 2 { - t.Fatal("expected 2 error") - } - if s.TotalPanics() != 3 { - t.Fatal("expected 3 panics") - } - - other := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - other.TableClient["test_table"] = make(map[string]*TableClientMetrics) - other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if !s.Equal(other) { - t.Fatal("expected metrics to be equal") - } -} - -func TestInProgressTables(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table_done"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_done"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - endTime: time.Now().Add(time.Second), - } - - s.TableClient["test_table_running1"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running1"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - - s.TableClient["test_table_running2"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running2"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - s.TableClient["test_table_running3"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running3"]["testExecutionClient"] = &TableClientMetrics{} - assert.ElementsMatch(t, []string{"test_table_running1", "test_table_running2"}, s.InProgressTables()) -} - -func TestQueuedTables(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table_done"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_done"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - endTime: time.Now().Add(time.Second), - } - - s.TableClient["test_table_running1"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running1"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - - s.TableClient["test_table_running2"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running2"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - startTime: time.Now(), - } - s.TableClient["test_table_running3"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table_running3"]["testExecutionClient"] = &TableClientMetrics{} - assert.ElementsMatch(t, []string{"test_table_running3"}, s.QueuedTables()) -} - -type MockClientMeta struct { -} - -func (*MockClientMeta) ID() string { - return "id" -} - -var exampleTableSchema = &schema.Table{ - Name: "toplevel", - Columns: schema.ColumnList{ - { - Name: "col1", - Type: &arrow.Int32Type{}, - }, - }, - Relations: []*schema.Table{ - { - Name: "child", - Columns: schema.ColumnList{ - { - Name: "col1", - Type: &arrow.Int32Type{}, - }, - }, - }, - }, -} - -// When a top-level table is marked as done, all child tables should be marked as done as well. -// For child-tables, only the specified table should be marked as done. -func TestMarkEndChildTableNotRecursive(t *testing.T) { - mockClientMeta := &MockClientMeta{} - - metrics := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - metrics.TableClient["toplevel"] = nil - metrics.TableClient["child"] = nil - - parentTable := exampleTableSchema - childTable := exampleTableSchema.Relations[0] - - metrics.initWithClients(parentTable, []schema.ClientMeta{mockClientMeta}) - metrics.MarkStart(parentTable, mockClientMeta.ID()) - metrics.MarkStart(childTable, mockClientMeta.ID()) - - assert.ElementsMatch(t, []string{"toplevel", "child"}, metrics.InProgressTables()) - - metrics.MarkEnd(childTable, mockClientMeta.ID()) - - assert.ElementsMatch(t, []string{"toplevel"}, metrics.InProgressTables()) -} - -func TestMarkEndTopLevelTableRecursive(t *testing.T) { - mockClientMeta := &MockClientMeta{} - - metrics := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - metrics.TableClient["toplevel"] = nil - metrics.TableClient["child"] = nil - - parentTable := exampleTableSchema - childTable := exampleTableSchema.Relations[0] - - metrics.initWithClients(parentTable, []schema.ClientMeta{mockClientMeta}) - metrics.MarkStart(parentTable, mockClientMeta.ID()) - metrics.MarkStart(childTable, mockClientMeta.ID()) - - assert.ElementsMatch(t, []string{"toplevel", "child"}, metrics.InProgressTables()) - - metrics.MarkEnd(parentTable, mockClientMeta.ID()) - - assert.Empty(t, metrics.InProgressTables()) -} diff --git a/plugins/source/options.go b/plugins/source/options.go deleted file mode 100644 index 72ddc5acc7..0000000000 --- a/plugins/source/options.go +++ /dev/null @@ -1,39 +0,0 @@ -package source - -import ( - "context" - - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -type GetTables func(ctx context.Context, c schema.ClientMeta) (schema.Tables, error) - -type Option func(*Plugin) - -// WithDynamicTableOption allows the plugin to return list of tables after call to New -func WithDynamicTableOption(getDynamicTables GetTables) Option { - return func(p *Plugin) { - p.getDynamicTables = getDynamicTables - } -} - -// WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables -func WithNoInternalColumns() Option { - return func(p *Plugin) { - p.internalColumns = false - } -} - -func WithUnmanaged() Option { - return func(p *Plugin) { - p.unmanaged = true - } -} - -// WithTitleTransformer allows the plugin to control how table names get turned into titles for the -// generated documentation. -func WithTitleTransformer(t func(*schema.Table) string) Option { - return func(p *Plugin) { - p.titleTransformer = t - } -} diff --git a/plugins/source/plugin.go b/plugins/source/plugin.go deleted file mode 100644 index 5a0363af1e..0000000000 --- a/plugins/source/plugin.go +++ /dev/null @@ -1,345 +0,0 @@ -package source - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/backend" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/internal/backends/local" - "github.com/cloudquery/plugin-sdk/v3/internal/backends/nop" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" - "golang.org/x/sync/semaphore" -) - -type Options struct { - Backend backend.Backend -} - -type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) - -type UnmanagedClient interface { - schema.ClientMeta - Sync(ctx context.Context, metrics *Metrics, res chan<- *schema.Resource) error -} - -// Plugin is the base structure required to pass to sdk.serve -// We take a declarative approach to API here similar to Cobra -type Plugin struct { - // Name of plugin i.e aws,gcp, azure etc' - name string - // Version of the plugin - version string - // Called upon configure call to validate and init configuration - newExecutionClient NewExecutionClientFunc - // dynamic table function if specified - getDynamicTables GetTables - // Tables is all tables supported by this source plugin - tables schema.Tables - // status sync metrics - metrics *Metrics - // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. - logger zerolog.Logger - // resourceSem is a semaphore that limits the number of concurrent resources being fetched - resourceSem *semaphore.Weighted - // tableSem is a semaphore that limits the number of concurrent tables being fetched - tableSems []*semaphore.Weighted - // maxDepth is the max depth of tables - maxDepth uint64 - // caser - caser *caser.Caser - // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) - mu sync.Mutex - - // client is the initialized session client - client schema.ClientMeta - // sessionTables are the - sessionTables schema.Tables - // backend is the backend used to store the cursor state - backend backend.Backend - // spec is the spec the client was initialized with - spec specs.Source - // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id - // useful for sources such as PostgreSQL and other databases - internalColumns bool - // unmanaged if set to true then the plugin will call Sync directly and not use the scheduler - unmanaged bool - // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string - syncTime time.Time -} - -const ( - maxAllowedDepth = 4 -) - -// Add internal columns -func (p *Plugin) addInternalColumns(tables []*schema.Table) error { - for _, table := range tables { - if c := table.Column("_cq_id"); c != nil { - return fmt.Errorf("table %s already has column _cq_id", table.Name) - } - cqID := schema.CqIDColumn - if len(table.PrimaryKeys()) == 0 { - cqID.PrimaryKey = true - } - cqSourceName := schema.CqSourceNameColumn - cqSyncTime := schema.CqSyncTimeColumn - cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.spec.Name) - } - cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.syncTime) - } - - table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) - if err := p.addInternalColumns(table.Relations); err != nil { - return err - } - } - return nil -} - -// Set parent links on relational tables -func setParents(tables schema.Tables, parent *schema.Table) { - for _, table := range tables { - table.Parent = parent - setParents(table.Relations, table) - } -} - -// Apply transformations to tables -func transformTables(tables schema.Tables) error { - for _, table := range tables { - if table.Transform != nil { - if err := table.Transform(table); err != nil { - return fmt.Errorf("failed to transform table %s: %w", table.Name, err) - } - } - if err := transformTables(table.Relations); err != nil { - return err - } - } - return nil -} - -func maxDepth(tables schema.Tables) uint64 { - var depth uint64 - if len(tables) == 0 { - return 0 - } - for _, table := range tables { - newDepth := 1 + maxDepth(table.Relations) - if newDepth > depth { - depth = newDepth - } - } - return depth -} - -// NewPlugin returns a new plugin with a given name, version, tables, newExecutionClient -// and additional options. -func NewPlugin(name string, version string, tables []*schema.Table, newExecutionClient NewExecutionClientFunc, options ...Option) *Plugin { - p := Plugin{ - name: name, - version: version, - tables: tables, - newExecutionClient: newExecutionClient, - metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - internalColumns: true, - } - for _, opt := range options { - opt(&p) - } - setParents(p.tables, nil) - if err := transformTables(p.tables); err != nil { - panic(err) - } - if p.internalColumns { - if err := p.addInternalColumns(p.tables); err != nil { - panic(err) - } - } - if err := p.validate(); err != nil { - panic(err) - } - p.maxDepth = maxDepth(p.tables) - if p.maxDepth > maxAllowedDepth { - panic(fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth)) - } - return &p -} - -func (p *Plugin) SetLogger(logger zerolog.Logger) { - p.logger = logger.With().Str("module", p.name+"-src").Logger() -} - -// Tables returns all tables supported by this source plugin -func (p *Plugin) Tables() schema.Tables { - return p.tables -} - -func (p *Plugin) HasDynamicTables() bool { - return p.getDynamicTables != nil -} - -func (p *Plugin) GetDynamicTables() schema.Tables { - return p.sessionTables -} - -// TablesForSpec returns all tables supported by this source plugin that match the given spec. -// It validates the tables part of the spec and will return an error if it is found to be invalid. -// This is deprecated method -func (p *Plugin) TablesForSpec(spec specs.Source) (schema.Tables, error) { - spec.SetDefaults() - if err := spec.Validate(); err != nil { - return nil, fmt.Errorf("invalid spec: %w", err) - } - tables, err := p.tables.FilterDfs(spec.Tables, spec.SkipTables, spec.SkipDependentTables) - if err != nil { - return nil, fmt.Errorf("failed to filter tables: %w", err) - } - return tables, nil -} - -// Name return the name of this plugin -func (p *Plugin) Name() string { - return p.name -} - -// Version returns the version of this plugin -func (p *Plugin) Version() string { - return p.version -} - -func (p *Plugin) Metrics() *Metrics { - return p.metrics -} - -func (p *Plugin) Init(ctx context.Context, spec specs.Source) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - - var err error - spec.SetDefaults() - if err := spec.Validate(); err != nil { - return fmt.Errorf("invalid spec: %w", err) - } - p.spec = spec - - switch spec.Backend { - case specs.BackendNone: - p.backend = nop.New() - case specs.BackendLocal: - p.backend, err = local.New(spec) - if err != nil { - return fmt.Errorf("failed to initialize local backend: %w", err) - } - default: - return fmt.Errorf("unknown backend: %s", spec.Backend) - } - - tables := p.tables - if p.getDynamicTables != nil { - p.client, err = p.newExecutionClient(ctx, p.logger, spec, Options{Backend: p.backend}) - if err != nil { - return fmt.Errorf("failed to create execution client for source plugin %s: %w", p.name, err) - } - tables, err = p.getDynamicTables(ctx, p.client) - if err != nil { - return fmt.Errorf("failed to get dynamic tables: %w", err) - } - - tables, err = tables.FilterDfs(spec.Tables, spec.SkipTables, spec.SkipDependentTables) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - if len(tables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - - setParents(tables, nil) - if err := transformTables(tables); err != nil { - return err - } - if p.internalColumns { - if err := p.addInternalColumns(tables); err != nil { - return err - } - } - if err := p.validate(); err != nil { - return err - } - p.maxDepth = maxDepth(tables) - if p.maxDepth > maxAllowedDepth { - return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) - } - } else { - tables, err = tables.FilterDfs(spec.Tables, spec.SkipTables, spec.SkipDependentTables) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - } - - p.sessionTables = tables - return nil -} - -// Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, res chan<- *schema.Resource) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - p.syncTime = syncTime - if p.client == nil { - var err error - p.client, err = p.newExecutionClient(ctx, p.logger, p.spec, Options{Backend: p.backend}) - if err != nil { - return fmt.Errorf("failed to create execution client for source plugin %s: %w", p.name, err) - } - } - - startTime := time.Now() - if p.unmanaged { - unmanagedClient := p.client.(UnmanagedClient) - if err := unmanagedClient.Sync(ctx, p.metrics, res); err != nil { - return fmt.Errorf("failed to sync unmanaged client: %w", err) - } - } else { - switch p.spec.Scheduler { - case specs.SchedulerDFS: - p.syncDfs(ctx, p.spec, p.client, p.sessionTables, res) - case specs.SchedulerRoundRobin: - p.syncRoundRobin(ctx, p.spec, p.client, p.sessionTables, res) - default: - return fmt.Errorf("unknown scheduler %s. Options are: %v", p.spec.Scheduler, specs.AllSchedulers.String()) - } - } - - p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") - return nil -} - -func (p *Plugin) Close(ctx context.Context) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - if p.backend != nil { - err := p.backend.Close(ctx) - if err != nil { - return fmt.Errorf("failed to close backend: %w", err) - } - p.backend = nil - } - return nil -} diff --git a/plugins/source/scheduler.go b/plugins/source/scheduler.go deleted file mode 100644 index 1967f3cc1a..0000000000 --- a/plugins/source/scheduler.go +++ /dev/null @@ -1,177 +0,0 @@ -package source - -import ( - "context" - "errors" - "fmt" - "runtime/debug" - "sync" - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/getsentry/sentry-go" - "github.com/rs/zerolog" - "github.com/thoas/go-funk" -) - -const ( - minTableConcurrency = 1 - minResourceConcurrency = 100 -) - -const periodicMetricLoggerInterval = 30 * time.Second -const periodicMetricLoggerLogTablesLimit = 30 // The max number of in_progress_tables to log in the periodic metric logger - -func (p *Plugin) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { - clientName := client.ID() - for _, table := range tables { - metrics := p.metrics.TableClient[table.Name][clientName] - p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) - } -} - -func (p *Plugin) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { - var validationErr *schema.ValidationError - ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) - defer cancel() - resource := schema.NewResourceData(table, parent, item) - objectStartTime := time.Now() - clientID := client.ID() - tableMetrics := p.metrics.TableClient[table.Name][clientID] - logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - if table.PreResourceResolver != nil { - if err := table.PreResourceResolver(ctx, client, resource); err != nil { - logger.Error().Err(err).Msg("pre resource resolver failed") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - return nil - } - } - - for _, c := range table.Columns { - p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) - } - - if table.PostResourceResolver != nil { - if err := table.PostResourceResolver(ctx, client, resource); err != nil { - logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - atomic.AddUint64(&tableMetrics.Resources, 1) - return resource -} - -func (p *Plugin) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { - var validationErr *schema.ValidationError - columnStartTime := time.Now() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - - if c.Resolver != nil { - if err := c.Resolver(ctx, client, resource, c); err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } else { - // base use case: try to get column with CamelCase name - v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) - if v != nil { - err := resource.Set(c.Name, v) - if err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - } -} - -func (p *Plugin) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { - defer wg.Done() - - ticker := time.NewTicker(periodicMetricLoggerInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - inProgressTables := p.metrics.InProgressTables() - queuedTables := p.metrics.QueuedTables() - logLine := p.logger.Info(). - Uint64("total_resources", p.metrics.TotalResourcesAtomic()). - Uint64("total_errors", p.metrics.TotalErrorsAtomic()). - Uint64("total_panics", p.metrics.TotalPanicsAtomic()). - Int("num_in_progress_tables", len(inProgressTables)). - Int("num_queued_tables", len(queuedTables)) - - if len(inProgressTables) <= periodicMetricLoggerLogTablesLimit { - logLine.Strs("in_progress_tables", inProgressTables) - } - - if len(queuedTables) <= periodicMetricLoggerLogTablesLimit { - logLine.Strs("queued_tables", queuedTables) - } - - logLine.Msg("Sync in progress") - } - } -} - -// unparam's suggestion to remove the second parameter is not good advice here. -// nolint:unparam -func max(a, b uint64) uint64 { - if a > b { - return a - } - return b -} diff --git a/plugins/source/scheduler_dfs.go b/plugins/source/scheduler_dfs.go deleted file mode 100644 index 1cd5142624..0000000000 --- a/plugins/source/scheduler_dfs.go +++ /dev/null @@ -1,234 +0,0 @@ -package source - -import ( - "context" - "errors" - "fmt" - "runtime/debug" - "sync" - "sync/atomic" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/helpers" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/getsentry/sentry-go" - "golang.org/x/sync/semaphore" -) - -func (p *Plugin) syncDfs(ctx context.Context, spec specs.Source, client schema.ClientMeta, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - // This is very similar to the concurrent web crawler problem with some minor changes. - // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) - resourceConcurrency := tableConcurrency * minResourceConcurrency - - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) - // reduce table concurrency logarithmically for every depth level - tableConcurrency = max(tableConcurrency/2, minTableConcurrency) - } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) - - // we have this because plugins can return sometimes clients in a random way which will cause - // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client} - if table.Multiplex != nil { - clients = table.Multiplex(client) - } - // Detect duplicate clients while multiplexing - seenClients := make(map[string]bool) - for _, c := range clients { - if _, ok := seenClients[c.ID()]; !ok { - seenClients[c.ID()] = true - } else { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage("duplicate client ID in " + table.Name) - }) - p.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") - } - } - preInitialisedClients[i] = clients - // we do this here to avoid locks so we initial the metrics structure once in the main goroutines - // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) - } - - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) - - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) - - var wg sync.WaitGroup - for i, table := range tables { - table := table - clients := preInitialisedClients[i] - for _, client := range clients { - client := client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { - // This means context was cancelled - wg.Wait() - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() - return - } - wg.Add(1) - go func() { - defer wg.Done() - defer p.tableSems[0].Release(1) - // not checking for error here as nothing much todo. - // the error is logged and this happens when context is cancelled - p.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) - }() - } - } - - // Wait for all the worker goroutines to finish - wg.Wait() - - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() -} - -func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { - clientName := client.ID() - - p.metrics.MarkStart(table, clientName) - defer p.Metrics().MarkEnd(table, clientName) - - var validationErr *schema.ValidationError - logger := p.logger.With().Str("table", table.Name).Str("client", clientName).Logger() - - if parent == nil { // Log only for root tables, otherwise we spam too much. - logger.Info().Msg("top level table resolver started") - } - tableMetrics := p.metrics.TableClient[table.Name][clientName] - - res := make(chan any) - go func() { - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - logger.Error().Interface("error", err).Str("stack", stack).Msg("table resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - } - close(res) - }() - if err := table.Resolver(ctx, client, parent, res); err != nil { - logger.Error().Err(err).Msg("table resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - return - } - }() - - for r := range res { - p.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) - } - - // we don't need any waitgroups here because we are waiting for the channel to close - if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. - logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) - } -} - -func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { - resourcesSlice := helpers.InterfaceSlice(resources) - if len(resourcesSlice) == 0 { - return - } - resourcesChan := make(chan *schema.Resource, len(resourcesSlice)) - go func() { - defer close(resourcesChan) - var wg sync.WaitGroup - sentValidationErrors := sync.Map{} - for i := range resourcesSlice { - i := i - if err := p.resourceSem.Acquire(ctx, 1); err != nil { - p.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") - wg.Wait() - // we have to continue emptying the channel to exit gracefully - return - } - wg.Add(1) - go func() { - defer p.resourceSem.Release(1) - defer wg.Done() - //nolint:all - resolvedResource := p.resolveResource(ctx, table, client, parent, resourcesSlice[i]) - if resolvedResource == nil { - return - } - - if err := resolvedResource.CalculateCQID(p.spec.DeterministicCQID); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") - if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { - // send resource validation errors to Sentry only once per table, - // to avoid sending too many duplicate messages - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(err.Error()) - }) - } - atomic.AddUint64(&tableMetrics.Errors, 1) - return - } - if err := resolvedResource.Validate(); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") - if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { - // send resource validation errors to Sentry only once per table, - // to avoid sending too many duplicate messages - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(err.Error()) - }) - } - atomic.AddUint64(&tableMetrics.Errors, 1) - return - } - resourcesChan <- resolvedResource - }() - } - wg.Wait() - }() - - var wg sync.WaitGroup - for resource := range resourcesChan { - resource := resource - resolvedResources <- resource - for _, relation := range resource.Table.Relations { - relation := relation - if err := p.tableSems[depth].Acquire(ctx, 1); err != nil { - // This means context was cancelled - wg.Wait() - return - } - wg.Add(1) - go func() { - defer wg.Done() - defer p.tableSems[depth].Release(1) - p.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) - }() - } - } - wg.Wait() -} diff --git a/plugins/source/scheduler_round_robin.go b/plugins/source/scheduler_round_robin.go deleted file mode 100644 index 00b1030f68..0000000000 --- a/plugins/source/scheduler_round_robin.go +++ /dev/null @@ -1,104 +0,0 @@ -package source - -import ( - "context" - "sync" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" - "golang.org/x/sync/semaphore" -) - -type tableClient struct { - table *schema.Table - client schema.ClientMeta -} - -func (p *Plugin) syncRoundRobin(ctx context.Context, spec specs.Source, client schema.ClientMeta, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) - resourceConcurrency := tableConcurrency * minResourceConcurrency - - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) - // reduce table concurrency logarithmically for every depth level - tableConcurrency = max(tableConcurrency/2, minTableConcurrency) - } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) - - // we have this because plugins can return sometimes clients in a random way which will cause - // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client} - if table.Multiplex != nil { - clients = table.Multiplex(client) - } - preInitialisedClients[i] = clients - // we do this here to avoid locks so we initial the metrics structure once in the main goroutines - // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) - } - - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) - - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) - - tableClients := roundRobinInterleave(tables, preInitialisedClients) - - var wg sync.WaitGroup - for _, tc := range tableClients { - table := tc.table - cl := tc.client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { - // This means context was cancelled - wg.Wait() - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() - return - } - wg.Add(1) - go func() { - defer wg.Done() - defer p.tableSems[0].Release(1) - // not checking for error here as nothing much to do. - // the error is logged and this happens when context is cancelled - // Round Robin currently uses the DFS algorithm to resolve the tables, but this - // may change in the future. - p.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) - }() - } - - // Wait for all the worker goroutines to finish - wg.Wait() - - // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() -} - -// interleave table-clients so that we get: -// table1-client1, table2-client1, table3-client1, table1-client2, table2-client2, table3-client2, ... -func roundRobinInterleave(tables schema.Tables, preInitialisedClients [][]schema.ClientMeta) []tableClient { - tableClients := make([]tableClient, 0) - c := 0 - for { - addedNew := false - for i, table := range tables { - if c < len(preInitialisedClients[i]) { - tableClients = append(tableClients, tableClient{table: table, client: preInitialisedClients[i][c]}) - addedNew = true - } - } - c++ - if !addedNew { - break - } - } - return tableClients -} diff --git a/plugins/source/scheduler_round_robin_test.go b/plugins/source/scheduler_round_robin_test.go deleted file mode 100644 index 8f7e3425f5..0000000000 --- a/plugins/source/scheduler_round_robin_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package source - -import ( - "testing" - - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -func TestRoundRobinInterleave(t *testing.T) { - table1 := &schema.Table{Name: "test_table"} - table2 := &schema.Table{Name: "test_table2"} - client1 := &testExecutionClient{} - client2 := &testExecutionClient{} - client3 := &testExecutionClient{} - cases := []struct { - name string - tables schema.Tables - preInitialisedClients [][]schema.ClientMeta - want []tableClient - }{ - { - name: "single table", - tables: schema.Tables{table1}, - preInitialisedClients: [][]schema.ClientMeta{{client1}}, - want: []tableClient{{table: table1, client: client1}}, - }, - { - name: "two tables with different clients", - tables: schema.Tables{table1, table2}, - preInitialisedClients: [][]schema.ClientMeta{{client1}, {client1, client2}}, - want: []tableClient{ - {table: table1, client: client1}, - {table: table2, client: client1}, - {table: table2, client: client2}, - }, - }, - { - name: "two tables with different clients", - tables: schema.Tables{table1, table2}, - preInitialisedClients: [][]schema.ClientMeta{{client1, client3}, {client1, client2}}, - want: []tableClient{ - {table: table1, client: client1}, - {table: table2, client: client1}, - {table: table1, client: client3}, - {table: table2, client: client2}, - }, - }, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got := roundRobinInterleave(tc.tables, tc.preInitialisedClients) - if len(got) != len(tc.want) { - t.Fatalf("got %d tableClients, want %d", len(got), len(tc.want)) - } - for i := range got { - if got[i].table != tc.want[i].table { - t.Errorf("got table %v, want %v", got[i].table, tc.want[i].table) - } - if got[i].client != tc.want[i].client { - t.Errorf("got client %v, want %v", got[i].client, tc.want[i].client) - } - } - }) - } -} diff --git a/plugins/source/templates/all_tables.md.go.tpl b/plugins/source/templates/all_tables.md.go.tpl deleted file mode 100644 index 008afb66fd..0000000000 --- a/plugins/source/templates/all_tables.md.go.tpl +++ /dev/null @@ -1,5 +0,0 @@ -# Source Plugin: {{.PluginName}} -## Tables -{{- range $table := $.Tables }} -{{- template "all_tables_entry.md.go.tpl" $table}} -{{- end }} \ No newline at end of file diff --git a/plugins/source/templates/all_tables_entry.md.go.tpl b/plugins/source/templates/all_tables_entry.md.go.tpl deleted file mode 100644 index 6166b1983b..0000000000 --- a/plugins/source/templates/all_tables_entry.md.go.tpl +++ /dev/null @@ -1,5 +0,0 @@ - -{{. | indentToDepth}}- [{{.Name}}]({{.Name}}.md){{ if .IsIncremental}} (Incremental){{ end }} -{{- range $index, $rel := .Relations}} -{{- template "all_tables_entry.md.go.tpl" $rel}} -{{- end}} \ No newline at end of file diff --git a/plugins/source/templates/table.md.go.tpl b/plugins/source/templates/table.md.go.tpl deleted file mode 100644 index 202d343e39..0000000000 --- a/plugins/source/templates/table.md.go.tpl +++ /dev/null @@ -1,44 +0,0 @@ -# Table: {{$.Name}} - -This table shows data for {{.|title}}. - -{{ $.Description }} -{{ $length := len $.PrimaryKeys -}} -{{ if eq $length 1 }} -The primary key for this table is **{{ index $.PrimaryKeys 0 }}**. -{{ else }} -The composite primary key for this table is ({{ range $index, $pk := $.PrimaryKeys -}} - {{if $index }}, {{end -}} - **{{$pk}}** - {{- end -}}). -{{ end }} -{{- if $.IsIncremental -}} -It supports incremental syncs -{{- $ikLength := len $.IncrementalKeys -}} -{{- if eq $ikLength 1 }} based on the **{{ index $.IncrementalKeys 0 }}** column -{{- else if gt $ikLength 1 }} based on the ({{ range $index, $pk := $.IncrementalKeys -}} - {{- if $index -}}, {{end -}} - **{{$pk}}** - {{- end -}}) columns -{{- end -}}. -{{- end -}} - -{{- if or ($.Relations) ($.Parent) }} -## Relations -{{- end }} -{{- if $.Parent }} -This table depends on [{{ $.Parent.Name }}]({{ $.Parent.Name }}.md). -{{- end}} -{{ if $.Relations }} -The following tables depend on {{.Name}}: -{{- range $rel := $.Relations }} - - [{{ $rel.Name }}]({{ $rel.Name }}.md) -{{- end }} -{{- end }} - -## Columns -| Name | Type | -| ------------- | ------------- | -{{- range $column := $.Columns }} -|{{$column.Name}}{{if $column.PrimaryKey}} (PK){{end}}{{if $column.IncrementalKey}} (Incremental Key){{end}}|`{{$column.Type}}`| -{{- end }} \ No newline at end of file diff --git a/plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json b/plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json deleted file mode 100644 index 7a8280833e..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-JSON-__tables.json +++ /dev/null @@ -1,214 +0,0 @@ -[ - { - "name": "incremental_table", - "title": "Incremental Table", - "description": "Description for incremental table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "int_col", - "type": "int64" - }, - { - "name": "id_col", - "type": "int64", - "is_primary_key": true, - "is_incremental_key": true - }, - { - "name": "id_col2", - "type": "int64", - "is_incremental_key": true - } - ], - "relations": [] - }, - { - "name": "test_table", - "title": "Test Table", - "description": "Description for test table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "int_col", - "type": "int64" - }, - { - "name": "id_col", - "type": "int64", - "is_primary_key": true - }, - { - "name": "id_col2", - "type": "int64", - "is_primary_key": true - }, - { - "name": "json_col", - "type": "json" - }, - { - "name": "list_col", - "type": "list" - }, - { - "name": "map_col", - "type": "map" - }, - { - "name": "struct_col", - "type": "struct" - } - ], - "relations": [ - { - "name": "relation_table", - "title": "Relation Table", - "description": "Description for relational table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [ - { - "name": "relation_relation_table_a", - "title": "Relation Relation Table A", - "description": "Description for relational table's relation", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [] - }, - { - "name": "relation_relation_table_b", - "title": "Relation Relation Table B", - "description": "Description for relational table's relation", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [] - } - ] - }, - { - "name": "relation_table2", - "title": "Relation Table2", - "description": "Description for second relational table", - "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, - { - "name": "string_col", - "type": "utf8" - } - ], - "relations": [] - } - ] - } -] - diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md deleted file mode 100644 index 9480a0598a..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Source Plugin: test - -## Tables - -- [incremental_table](incremental_table.md) (Incremental) -- [test_table](test_table.md) - - [relation_table](relation_table.md) - - [relation_relation_table_a](relation_relation_table_a.md) - - [relation_relation_table_b](relation_relation_table_b.md) - - [relation_table2](relation_table2.md) diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md deleted file mode 100644 index d0b1530577..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md +++ /dev/null @@ -1,20 +0,0 @@ -# Table: incremental_table - -This table shows data for Incremental Table. - -Description for incremental table - -The primary key for this table is **id_col**. -It supports incremental syncs based on the (**id_col**, **id_col2**) columns. - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id|`uuid`| -|_cq_parent_id|`uuid`| -|int_col|`int64`| -|id_col (PK) (Incremental Key)|`int64`| -|id_col2 (Incremental Key)|`int64`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md deleted file mode 100644 index 9ee22d1ba1..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md +++ /dev/null @@ -1,21 +0,0 @@ -# Table: relation_relation_table_a - -This table shows data for Relation Relation Table A. - -Description for relational table's relation - -The primary key for this table is **_cq_id**. - -## Relations - -This table depends on [relation_table](relation_table.md). - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id (PK)|`uuid`| -|_cq_parent_id|`uuid`| -|string_col|`utf8`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md deleted file mode 100644 index f6d68a71e1..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md +++ /dev/null @@ -1,21 +0,0 @@ -# Table: relation_relation_table_b - -This table shows data for Relation Relation Table B. - -Description for relational table's relation - -The primary key for this table is **_cq_id**. - -## Relations - -This table depends on [relation_table](relation_table.md). - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id (PK)|`uuid`| -|_cq_parent_id|`uuid`| -|string_col|`utf8`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md deleted file mode 100644 index 95c4125aa7..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-relation_table.md +++ /dev/null @@ -1,25 +0,0 @@ -# Table: relation_table - -This table shows data for Relation Table. - -Description for relational table - -The primary key for this table is **_cq_id**. - -## Relations - -This table depends on [test_table](test_table.md). - -The following tables depend on relation_table: - - [relation_relation_table_a](relation_relation_table_a.md) - - [relation_relation_table_b](relation_relation_table_b.md) - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id (PK)|`uuid`| -|_cq_parent_id|`uuid`| -|string_col|`utf8`| diff --git a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md deleted file mode 100644 index cdd1df3317..0000000000 --- a/plugins/source/testdata/TestGeneratePluginDocs-Markdown-test_table.md +++ /dev/null @@ -1,29 +0,0 @@ -# Table: test_table - -This table shows data for Test Table. - -Description for test table - -The composite primary key for this table is (**id_col**, **id_col2**). - -## Relations - -The following tables depend on test_table: - - [relation_table](relation_table.md) - - [relation_table2](relation_table2.md) - -## Columns - -| Name | Type | -| ------------- | ------------- | -|_cq_source_name|`utf8`| -|_cq_sync_time|`timestamp[us, tz=UTC]`| -|_cq_id|`uuid`| -|_cq_parent_id|`uuid`| -|int_col|`int64`| -|id_col (PK)|`int64`| -|id_col2 (PK)|`int64`| -|json_col|`json`| -|list_col|`list`| -|map_col|`map`| -|struct_col|`struct`| diff --git a/plugins/source/testing.go b/plugins/source/testing.go deleted file mode 100644 index 161778bca9..0000000000 --- a/plugins/source/testing.go +++ /dev/null @@ -1,141 +0,0 @@ -package source - -import ( - "context" - "testing" - "time" - - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/schema" -) - -type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) - -func TestPluginSync(t *testing.T, plugin *Plugin, spec specs.Source, opts ...TestPluginOption) { - t.Helper() - - o := &testPluginOptions{ - parallel: true, - validators: []Validator{validatePlugin}, - } - for _, opt := range opts { - opt(o) - } - if o.parallel { - t.Parallel() - } - - resourcesChannel := make(chan *schema.Resource) - var syncErr error - - if err := plugin.Init(context.Background(), spec); err != nil { - t.Fatal(err) - } - - go func() { - defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), time.Now(), resourcesChannel) - }() - - syncedResources := make([]*schema.Resource, 0) - for resource := range resourcesChannel { - syncedResources = append(syncedResources, resource) - } - if syncErr != nil { - t.Fatal(syncErr) - } - for _, validator := range o.validators { - validator(t, plugin, syncedResources) - } -} - -type TestPluginOption func(*testPluginOptions) - -func WithTestPluginNoParallel() TestPluginOption { - return func(f *testPluginOptions) { - f.parallel = false - } -} - -func WithTestPluginAdditionalValidators(v Validator) TestPluginOption { - return func(f *testPluginOptions) { - f.validators = append(f.validators, v) - } -} - -type testPluginOptions struct { - parallel bool - validators []Validator -} - -func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Resource) []*schema.Resource { - t.Helper() - - tableResources := make([]*schema.Resource, 0) - - for _, resource := range resources { - if resource.Table.Name == table.Name { - tableResources = append(tableResources, resource) - } - } - - return tableResources -} - -func validateTable(t *testing.T, table *schema.Table, resources []*schema.Resource) { - t.Helper() - tableResources := getTableResources(t, table, resources) - if len(tableResources) == 0 { - t.Errorf("Expected table %s to be synced but it was not found", table.Name) - return - } - validateResources(t, tableResources) -} - -func validatePlugin(t *testing.T, plugin *Plugin, resources []*schema.Resource) { - t.Helper() - tables := extractTables(plugin.tables) - for _, table := range tables { - validateTable(t, table, resources) - } -} - -func extractTables(tables schema.Tables) []*schema.Table { - result := make([]*schema.Table, 0) - for _, table := range tables { - result = append(result, table) - result = append(result, extractTables(table.Relations)...) - } - return result -} - -// Validates that every column has at least one non-nil value. -// Also does some additional validations. -func validateResources(t *testing.T, resources []*schema.Resource) { - t.Helper() - - table := resources[0].Table - - // A set of column-names that have values in at least one of the resources. - columnsWithValues := make([]bool, len(table.Columns)) - - for _, resource := range resources { - for i, value := range resource.GetValues() { - if value == nil { - continue - } - if value.IsValid() { - columnsWithValues[i] = true - } - } - } - - // Make sure every column has at least one value. - for i, hasValue := range columnsWithValues { - col := table.Columns[i] - emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil - if !hasValue && !emptyExpected && !col.IgnoreInTests { - t.Errorf("table: %s column %s has no values", table.Name, table.Columns[i].Name) - } - } -} diff --git a/plugins/source/validate.go b/plugins/source/validate.go deleted file mode 100644 index 835b798c7e..0000000000 --- a/plugins/source/validate.go +++ /dev/null @@ -1,25 +0,0 @@ -package source - -import ( - "fmt" -) - -func (p *Plugin) validate() error { - if err := p.tables.ValidateDuplicateColumns(); err != nil { - return fmt.Errorf("found duplicate columns in source plugin: %s: %w", p.name, err) - } - - if err := p.tables.ValidateDuplicateTables(); err != nil { - return fmt.Errorf("found duplicate tables in source plugin: %s: %w", p.name, err) - } - - if err := p.tables.ValidateTableNames(); err != nil { - return fmt.Errorf("found table with invalid name in source plugin: %s: %w", p.name, err) - } - - if err := p.tables.ValidateColumnNames(); err != nil { - return fmt.Errorf("found column with invalid name in source plugin: %s: %w", p.name, err) - } - - return nil -} diff --git a/scalar/inet.go b/scalar/inet.go index f693a479e0..3d6163cfc7 100644 --- a/scalar/inet.go +++ b/scalar/inet.go @@ -7,7 +7,7 @@ import ( "strings" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" ) type Inet struct { diff --git a/scalar/json.go b/scalar/json.go index ed6761351b..c0c5fceea3 100644 --- a/scalar/json.go +++ b/scalar/json.go @@ -6,7 +6,7 @@ import ( "reflect" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" ) type JSON struct { diff --git a/scalar/mac.go b/scalar/mac.go index cef4ac27f6..5350a64bee 100644 --- a/scalar/mac.go +++ b/scalar/mac.go @@ -4,7 +4,7 @@ import ( "net" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" ) type Mac struct { diff --git a/scalar/scalar.go b/scalar/scalar.go index 5f471e0258..d80c1a2e5e 100644 --- a/scalar/scalar.go +++ b/scalar/scalar.go @@ -5,9 +5,8 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/float16" - "github.com/cloudquery/plugin-sdk/v3/types" - "golang.org/x/exp/maps" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/types" ) // Scalar represents a single value of a specific DataType as opposed to @@ -33,7 +32,12 @@ type Scalar interface { type Vector []Scalar -const nullValueStr = array.NullValueStr +func (v Vector) ToArrowRecord(sc *arrow.Schema) arrow.Record { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc) + AppendToRecordBuilder(bldr, v) + rec := bldr.NewRecord() + return rec +} func (v Vector) Equal(r Vector) bool { if len(v) != len(r) { diff --git a/scalar/uuid.go b/scalar/uuid.go index f8a79c94b0..dfae523cbd 100644 --- a/scalar/uuid.go +++ b/scalar/uuid.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" ) diff --git a/schema/meta.go b/schema/meta.go index bd739bf80f..bd5ca2de7e 100644 --- a/schema/meta.go +++ b/schema/meta.go @@ -4,8 +4,8 @@ import ( "context" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/scalar" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/types" ) type ClientMeta interface { diff --git a/schema/resource.go b/schema/resource.go index fbbaf6667b..e9d1f07da3 100644 --- a/schema/resource.go +++ b/schema/resource.go @@ -4,7 +4,7 @@ import ( "crypto/sha256" "fmt" - "github.com/cloudquery/plugin-sdk/v3/scalar" + "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/google/uuid" "golang.org/x/exp/slices" ) diff --git a/schema/table.go b/schema/table.go index ed774f3b39..4475170104 100644 --- a/schema/table.go +++ b/schema/table.go @@ -6,7 +6,7 @@ import ( "regexp" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/internal/glob" + "github.com/cloudquery/plugin-sdk/v4/internal/glob" "golang.org/x/exp/slices" ) diff --git a/schema/testdata.go b/schema/testdata.go index 5570c6a090..c592ddc40a 100644 --- a/schema/testdata.go +++ b/schema/testdata.go @@ -12,7 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "golang.org/x/exp/rand" "golang.org/x/exp/slices" @@ -31,6 +31,7 @@ type TestSourceOptions struct { SkipTimes bool // time of day types SkipTimestamps bool // timestamp types. Microsecond timestamp is always be included, regardless of this setting. TimePrecision time.Duration + SkipDecimals bool } // TestSourceColumns returns columns for all Arrow types and composites thereof. TestSourceOptions controls diff --git a/serve/destination.go b/serve/destination.go deleted file mode 100644 index cba93b90a5..0000000000 --- a/serve/destination.go +++ /dev/null @@ -1,209 +0,0 @@ -package serve - -import ( - "fmt" - "net" - "os" - "os/signal" - "strings" - "sync" - "syscall" - - pbv0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - pbv1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" - servers "github.com/cloudquery/plugin-sdk/v3/internal/servers/destination/v0" - serversv1 "github.com/cloudquery/plugin-sdk/v3/internal/servers/destination/v1" - discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/types" - "github.com/getsentry/sentry-go" - grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" - "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - "github.com/thoas/go-funk" - "google.golang.org/grpc" - "google.golang.org/grpc/test/bufconn" -) - -type destinationServe struct { - plugin *destination.Plugin - sentryDSN string -} - -type DestinationOption func(*destinationServe) - -func WithDestinationSentryDSN(dsn string) DestinationOption { - return func(s *destinationServe) { - s.sentryDSN = dsn - } -} - -var testDestinationListener *bufconn.Listener -var testDestinationListenerLock sync.Mutex - -const serveDestinationShort = `Start destination plugin server` - -func Destination(plugin *destination.Plugin, opts ...DestinationOption) { - s := &destinationServe{ - plugin: plugin, - } - for _, opt := range opts { - opt(s) - } - if err := newCmdDestinationRoot(s).Execute(); err != nil { - sentry.CaptureMessage(err.Error()) - fmt.Println(err) - os.Exit(1) - } -} - -// nolint:dupl -func newCmdDestinationServe(serve *destinationServe) *cobra.Command { - var address string - var network string - var noSentry bool - logLevel := newEnum([]string{"trace", "debug", "info", "warn", "error"}, "info") - logFormat := newEnum([]string{"text", "json"}, "text") - telemetryLevel := newEnum([]string{"none", "errors", "stats", "all"}, "all") - err := telemetryLevel.Set(getEnvOrDefault("CQ_TELEMETRY_LEVEL", telemetryLevel.Value)) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to set telemetry level: "+err.Error()) - os.Exit(1) - } - - cmd := &cobra.Command{ - Use: "serve", - Short: serveDestinationShort, - Long: serveDestinationShort, - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - zerologLevel, err := zerolog.ParseLevel(logLevel.String()) - if err != nil { - return err - } - var logger zerolog.Logger - if logFormat.String() == "json" { - logger = zerolog.New(os.Stdout).Level(zerologLevel) - } else { - logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) - } - - var listener net.Listener - if network == "test" { - testDestinationListenerLock.Lock() - listener = bufconn.Listen(testBufSize) - testDestinationListener = listener.(*bufconn.Listener) - testDestinationListenerLock.Unlock() - } else { - listener, err = net.Listen(network, address) - if err != nil { - return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) - } - } - // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go - s := grpc.NewServer( - grpc.ChainUnaryInterceptor( - logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.ChainStreamInterceptor( - logging.StreamServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.MaxRecvMsgSize(MaxMsgSize), - grpc.MaxSendMsgSize(MaxMsgSize), - ) - pbv0.RegisterDestinationServer(s, &servers.Server{ - Plugin: serve.plugin, - Logger: logger, - }) - pbv1.RegisterDestinationServer(s, &serversv1.Server{ - Plugin: serve.plugin, - Logger: logger, - }) - pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ - Versions: []string{"v0", "v1"}, - }) - version := serve.plugin.Version() - - if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { - err = sentry.Init(sentry.ClientOptions{ - Dsn: serve.sentryDSN, - Debug: false, - AttachStacktrace: false, - Release: version, - Transport: sentry.NewHTTPSyncTransport(), - ServerName: "oss", // set to "oss" on purpose to avoid sending any identifying information - // https://docs.sentry.io/platforms/go/configuration/options/#removing-default-integrations - Integrations: func(integrations []sentry.Integration) []sentry.Integration { - var filteredIntegrations []sentry.Integration - for _, integration := range integrations { - if integration.Name() == "Modules" { - continue - } - filteredIntegrations = append(filteredIntegrations, integration) - } - return filteredIntegrations - }, - }) - if err != nil { - log.Error().Err(err).Msg("Error initializing sentry") - } - } - - if err := types.RegisterAllExtensions(); err != nil { - return err - } - defer func() { - if err := types.UnregisterAllExtensions(); err != nil { - logger.Error().Err(err).Msg("Failed to unregister extensions") - } - }() - - ctx := cmd.Context() - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt, syscall.SIGTERM) - defer func() { - signal.Stop(c) - }() - - go func() { - select { - case sig := <-c: - logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Destination plugin server shutting down") - s.Stop() - case <-ctx.Done(): - logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Destination plugin server shutting down") - s.Stop() - } - }() - - logger.Info().Str("address", listener.Addr().String()).Msg("Destination plugin server listening") - if err := s.Serve(listener); err != nil { - return fmt.Errorf("failed to serve: %w", err) - } - return nil - }, - } - cmd.Flags().StringVar(&address, "address", "localhost:7777", "address to serve on. can be tcp: `localhost:7777` or unix socket: `/tmp/plugin.rpc.sock`") - cmd.Flags().StringVar(&network, "network", "tcp", `the network must be "tcp", "tcp4", "tcp6", "unix" or "unixpacket"`) - cmd.Flags().Var(logLevel, "log-level", fmt.Sprintf("log level. one of: %s", strings.Join(logLevel.Allowed, ","))) - cmd.Flags().Var(logFormat, "log-format", fmt.Sprintf("log format. one of: %s", strings.Join(logFormat.Allowed, ","))) - cmd.Flags().BoolVar(&noSentry, "no-sentry", false, "disable sentry") - sendErrors := funk.ContainsString([]string{"all", "errors"}, telemetryLevel.String()) - if !sendErrors { - noSentry = true - } - return cmd -} - -func newCmdDestinationRoot(serve *destinationServe) *cobra.Command { - cmd := &cobra.Command{ - Use: fmt.Sprintf("%s ", serve.plugin.Name()), - } - cmd.AddCommand(newCmdDestinationServe(serve)) - cmd.CompletionOptions.DisableDefaultCmd = true - cmd.Version = serve.plugin.Version() - return cmd -} diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 84c4b0e272..e59bcbfd88 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -16,27 +16,27 @@ import ( "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" - "github.com/cloudquery/plugin-sdk/v3/internal/deprecated" - "github.com/cloudquery/plugin-sdk/v3/internal/memdb" - serversDestination "github.com/cloudquery/plugin-sdk/v3/internal/servers/destination/v0" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" + "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" + "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/protobuf/types/known/timestamppb" ) func bufDestinationDialer(context.Context, string) (net.Conn, error) { - testDestinationListenerLock.Lock() - defer testDestinationListenerLock.Unlock() - return testDestinationListener.Dial() + testPluginListenerLock.Lock() + defer testPluginListenerLock.Unlock() + return testPluginListener.Dial() } func TestDestination(t *testing.T) { - plugin := destination.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &destinationServe{ + plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) + s := &pluginServe{ plugin: plugin, } - cmd := newCmdDestinationRoot(s) + cmd := newCmdPluginRoot(s) cmd.SetArgs([]string{"serve", "--network", "test"}) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -54,12 +54,12 @@ func TestDestination(t *testing.T) { // wait for the server to start for { - testDestinationListenerLock.Lock() - if testDestinationListener != nil { - testDestinationListenerLock.Unlock() + testPluginListenerLock.Lock() + if testPluginListener != nil { + testPluginListenerLock.Unlock() break } - testDestinationListenerLock.Unlock() + testPluginListenerLock.Unlock() t.Log("waiting for grpc server to start") time.Sleep(time.Millisecond * 200) } @@ -163,8 +163,8 @@ func TestDestination(t *testing.T) { for resource := range readCh { totalResources++ if !array.RecordEqual(destRecord, resource) { - diff := destination.RecordDiff(destRecord, resource) - t.Fatalf("expected %v but got %v. Diff: %v", destRecord, resource, diff) + // diff := destination.RecordDiff(destRecord, resource) + t.Fatalf("expected %v but got %v", destRecord, resource) } } if totalResources != 1 { diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index e5172106ad..0f55b90694 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -13,20 +13,21 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/internal/memdb" - "github.com/cloudquery/plugin-sdk/v3/plugins/destination" - "github.com/cloudquery/plugin-sdk/v3/schema" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/plugins/destination" + "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/protobuf/types/known/timestamppb" ) func TestDestinationV1(t *testing.T) { - plugin := destination.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &destinationServe{ + plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) + s := &pluginServe{ plugin: plugin, } - cmd := newCmdDestinationRoot(s) + cmd := newCmdPluginRoot(s) cmd.SetArgs([]string{"serve", "--network", "test"}) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -44,12 +45,12 @@ func TestDestinationV1(t *testing.T) { // wait for the server to start for { - testDestinationListenerLock.Lock() - if testDestinationListener != nil { - testDestinationListenerLock.Unlock() + testPluginListenerLock.Lock() + if testPluginListener != nil { + testPluginListenerLock.Unlock() break } - testDestinationListenerLock.Unlock() + testPluginListenerLock.Unlock() t.Log("waiting for grpc server to start") time.Sleep(time.Millisecond * 200) } diff --git a/serve/plugin.go b/serve/plugin.go index b37be8513c..2fe9774262 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -9,13 +9,17 @@ import ( "sync" "syscall" - "github.com/cloudquery/plugin-sdk/v3/plugin" + "github.com/cloudquery/plugin-sdk/v4/plugin" + pbDestinationV0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" + pbDestinationV1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" - pbv0 "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" - discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" + pbv3 "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + discoveryServerV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/discovery/v0" - serversv0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/plugin/v0" + serverDestinationV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" + serverDestinationV1 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v1" + serversv3 "github.com/cloudquery/plugin-sdk/v4/internal/servers/plugin/v3" "github.com/getsentry/sentry-go" grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" @@ -30,6 +34,7 @@ import ( type pluginServe struct { plugin *plugin.Plugin + destinationV0V1Server bool sentryDSN string } @@ -41,6 +46,14 @@ func WithPluginSentryDSN(dsn string) PluginOption { } } +// WithDestinationV0V1Server is used to include destination v0 and v1 server to work +// with older sources +func WithDestinationV0V1Server() PluginOption { + return func(s *pluginServe) { + s.destinationV0V1Server = true + } +} + // lis used for unit testing grpc server and client var testPluginListener *bufconn.Listener var testPluginListenerLock sync.Mutex @@ -77,8 +90,8 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { cmd := &cobra.Command{ Use: "serve", - Short: serveSourceShort, - Long: serveSourceShort, + Short: servePluginShort, + Long: servePluginShort, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { zerologLevel, err := zerolog.ParseLevel(logLevel.String()) @@ -95,10 +108,10 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { // opts.Plugin.Logger = logger var listener net.Listener if network == "test" { - testSourceListenerLock.Lock() + testPluginListenerLock.Lock() listener = bufconn.Listen(testBufSize) - testSourceListener = listener.(*bufconn.Listener) - testSourceListenerLock.Unlock() + testPluginListener = listener.(*bufconn.Listener) + testPluginListenerLock.Unlock() } else { listener, err = net.Listen(network, address) if err != nil { @@ -120,12 +133,22 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { grpc.MaxSendMsgSize(MaxMsgSize), ) serve.plugin.SetLogger(logger) - pbv0.RegisterPluginServer(s, &serversv0.Server{ + pbv3.RegisterPluginServer(s, &serversv3.Server{ Plugin: serve.plugin, Logger: logger, }) + if serve.destinationV0V1Server { + pbDestinationV1.RegisterDestinationServer(s, &serverDestinationV1.Server{ + Plugin: serve.plugin, + Logger: logger, + }) + pbDestinationV0.RegisterDestinationServer(s, &serverDestinationV0.Server{ + Plugin: serve.plugin, + Logger: logger, + }) + } pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ - Versions: []string{"v2"}, + Versions: []string{"v0", "v1", "v2", "v3"}, }) version := serve.plugin.Version() @@ -211,11 +234,11 @@ func newCmdPluginDoc(serve *pluginServe) *cobra.Command { format := newEnum([]string{"json", "markdown"}, "markdown") cmd := &cobra.Command{ Use: "doc ", - Short: sourceDocShort, - Long: sourceDocLong, + Short: pluginDocShort, + Long: pluginDocLong, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - pbFormat := pbv0.GenDocs_FORMAT(pbv0.GenDocs_FORMAT_value[format.Value]) + pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) return serve.plugin.GeneratePluginDocs(serve.plugin.StaticTables(), args[0], pbFormat) }, } diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 8a541611e9..9aeb864a4c 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -12,10 +12,10 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" - pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/schema" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/plugins/source" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -25,7 +25,11 @@ type TestSourcePluginSpec struct { Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` } -type testExecutionClient struct{} +type testExecutionClient struct { + plugin.UnimplementedSync + plugin.UnimplementedWriter + plugin.UnimplementedRead +} var _ schema.ClientMeta = &testExecutionClient{} @@ -53,24 +57,28 @@ func (*testExecutionClient) ID() string { return "testExecutionClient" } -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, source.Options) (schema.ClientMeta, error) { +func (*testExecutionClient) Close(ctx context.Context) error { + return nil +} + +func newTestExecutionClient(context.Context, zerolog.Logger, pb.Spec) (plugin.Client, error) { return &testExecutionClient{}, nil } func bufSourceDialer(context.Context, string) (net.Conn, error) { - testSourceListenerLock.Lock() - defer testSourceListenerLock.Unlock() - return testSourceListener.Dial() + testPluginListenerLock.Lock() + defer testPluginListenerLock.Unlock() + return testPluginListener.Dial() } func TestSourceSuccess(t *testing.T) { - plugin := source.NewPlugin( + plugin := plugin.NewPlugin( "testPlugin", "v1.0.0", - []*schema.Table{testTable("test_table"), testTable("test_table2")}, - newTestExecutionClient) + newTestExecutionClient, + plugin.WithStaticTables([]*schema.Table{testTable("test_table"), testTable("test_table2")})) - cmd := newCmdSourceRoot(&sourceServe{ + cmd := newCmdPluginRoot(&pluginServe{ plugin: plugin, }) cmd.SetArgs([]string{"serve", "--network", "test"}) @@ -88,12 +96,12 @@ func TestSourceSuccess(t *testing.T) { wg.Wait() }() for { - testSourceListenerLock.Lock() - if testSourceListener != nil { - testSourceListenerLock.Unlock() + testPluginListenerLock.Lock() + if testPluginListener != nil { + testPluginListenerLock.Unlock() break } - testSourceListenerLock.Unlock() + testPluginListenerLock.Unlock() t.Log("waiting for grpc server to start") time.Sleep(time.Millisecond * 200) } @@ -103,7 +111,7 @@ func TestSourceSuccess(t *testing.T) { if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } - c := pb.NewSourceClient(conn) + c := pb.NewPluginClient(conn) getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) if err != nil { @@ -121,21 +129,17 @@ func TestSourceSuccess(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } - spec := specs.Source{ - Name: "testSourcePlugin", - Version: "v1.0.0", - Path: "cloudquery/testSourcePlugin", - Registry: specs.RegistryGithub, - Tables: []string{"test_table"}, - Spec: TestSourcePluginSpec{Accounts: []string{"cloudquery/plugin-sdk"}}, - Destinations: []string{"test"}, - } - specMarshaled, err := json.Marshal(spec) - if err != nil { - t.Fatalf("Failed to marshal spec: %v", err) + spec := pb.Spec{ + Name: "testSourcePlugin", + Version: "v1.0.0", + Path: "cloudquery/testSourcePlugin", + SyncSpec: &pb.SyncSpec{ + Tables: []string{"test_table"}, + Destinations: []string{"test"}, + }, } - getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) + getTablesRes, err := c.GetStaticTables(ctx, &pb.GetStaticTables_Request{}) if err != nil { t.Fatal(err) } @@ -148,7 +152,7 @@ func TestSourceSuccess(t *testing.T) { if len(tables) != 2 { t.Fatalf("Expected 2 tables but got %d", len(tables)) } - if _, err := c.Init(ctx, &pb.Init_Request{Spec: specMarshaled}); err != nil { + if _, err := c.Init(ctx, &pb.Init_Request{Spec: &spec}); err != nil { t.Fatal(err) } diff --git a/serve/source.go b/serve/source.go deleted file mode 100644 index ae57c83d07..0000000000 --- a/serve/source.go +++ /dev/null @@ -1,233 +0,0 @@ -package serve - -import ( - "fmt" - "net" - "os" - "os/signal" - "strings" - "sync" - "syscall" - - pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" - pbv2 "github.com/cloudquery/plugin-pb-go/pb/source/v2" - discoveryServerV0 "github.com/cloudquery/plugin-sdk/v3/internal/servers/discovery/v0" - - serversv2 "github.com/cloudquery/plugin-sdk/v3/internal/servers/source/v2" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/getsentry/sentry-go" - grpczerolog "github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2" - "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - "github.com/thoas/go-funk" - "golang.org/x/net/netutil" - "google.golang.org/grpc" - "google.golang.org/grpc/test/bufconn" -) - -type sourceServe struct { - plugin *source.Plugin - sentryDSN string -} - -type SourceOption func(*sourceServe) - -func WithSourceSentryDSN(dsn string) SourceOption { - return func(s *sourceServe) { - s.sentryDSN = dsn - } -} - -// lis used for unit testing grpc server and client -var testSourceListener *bufconn.Listener -var testSourceListenerLock sync.Mutex - -const serveSourceShort = `Start source plugin server` - -func Source(plugin *source.Plugin, opts ...SourceOption) { - s := &sourceServe{ - plugin: plugin, - } - for _, opt := range opts { - opt(s) - } - if err := newCmdSourceRoot(s).Execute(); err != nil { - sentry.CaptureMessage(err.Error()) - fmt.Println(err) - os.Exit(1) - } -} - -// nolint:dupl -func newCmdSourceServe(serve *sourceServe) *cobra.Command { - var address string - var network string - var noSentry bool - logLevel := newEnum([]string{"trace", "debug", "info", "warn", "error"}, "info") - logFormat := newEnum([]string{"text", "json"}, "text") - telemetryLevel := newEnum([]string{"none", "errors", "stats", "all"}, "all") - err := telemetryLevel.Set(getEnvOrDefault("CQ_TELEMETRY_LEVEL", telemetryLevel.Value)) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to set telemetry level: "+err.Error()) - os.Exit(1) - } - - cmd := &cobra.Command{ - Use: "serve", - Short: serveSourceShort, - Long: serveSourceShort, - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - zerologLevel, err := zerolog.ParseLevel(logLevel.String()) - if err != nil { - return err - } - var logger zerolog.Logger - if logFormat.String() == "json" { - logger = zerolog.New(os.Stdout).Level(zerologLevel) - } else { - logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) - } - - // opts.Plugin.Logger = logger - var listener net.Listener - if network == "test" { - testSourceListenerLock.Lock() - listener = bufconn.Listen(testBufSize) - testSourceListener = listener.(*bufconn.Listener) - testSourceListenerLock.Unlock() - } else { - listener, err = net.Listen(network, address) - if err != nil { - return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) - } - } - // source plugins can only accept one connection at a time - // unlike destination plugins that can accept multiple connections - limitListener := netutil.LimitListener(listener, 1) - // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go - s := grpc.NewServer( - grpc.ChainUnaryInterceptor( - logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.ChainStreamInterceptor( - logging.StreamServerInterceptor(grpczerolog.InterceptorLogger(logger)), - ), - grpc.MaxRecvMsgSize(MaxMsgSize), - grpc.MaxSendMsgSize(MaxMsgSize), - ) - serve.plugin.SetLogger(logger) - pbv2.RegisterSourceServer(s, &serversv2.Server{ - Plugin: serve.plugin, - Logger: logger, - }) - pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ - Versions: []string{"v2"}, - }) - - version := serve.plugin.Version() - - if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { - err = sentry.Init(sentry.ClientOptions{ - Dsn: serve.sentryDSN, - Debug: false, - AttachStacktrace: false, - Release: version, - Transport: sentry.NewHTTPSyncTransport(), - ServerName: "oss", // set to "oss" on purpose to avoid sending any identifying information - // https://docs.sentry.io/platforms/go/configuration/options/#removing-default-integrations - Integrations: func(integrations []sentry.Integration) []sentry.Integration { - var filteredIntegrations []sentry.Integration - for _, integration := range integrations { - if integration.Name() == "Modules" { - continue - } - filteredIntegrations = append(filteredIntegrations, integration) - } - return filteredIntegrations - }, - }) - if err != nil { - log.Error().Err(err).Msg("Error initializing sentry") - } - } - - ctx := cmd.Context() - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt, syscall.SIGTERM) - defer func() { - signal.Stop(c) - }() - - go func() { - select { - case sig := <-c: - logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Source plugin server shutting down") - s.Stop() - case <-ctx.Done(): - logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Source plugin server shutting down") - s.Stop() - } - }() - - logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") - if err := s.Serve(limitListener); err != nil { - return fmt.Errorf("failed to serve: %w", err) - } - return nil - }, - } - cmd.Flags().StringVar(&address, "address", "localhost:7777", "address to serve on. can be tcp: `localhost:7777` or unix socket: `/tmp/plugin.rpc.sock`") - cmd.Flags().StringVar(&network, "network", "tcp", `the network must be "tcp", "tcp4", "tcp6", "unix" or "unixpacket"`) - cmd.Flags().Var(logLevel, "log-level", fmt.Sprintf("log level. one of: %s", strings.Join(logLevel.Allowed, ","))) - cmd.Flags().Var(logFormat, "log-format", fmt.Sprintf("log format. one of: %s", strings.Join(logFormat.Allowed, ","))) - cmd.Flags().BoolVar(&noSentry, "no-sentry", false, "disable sentry") - sendErrors := funk.ContainsString([]string{"all", "errors"}, telemetryLevel.String()) - if !sendErrors { - noSentry = true - } - - return cmd -} - -const ( - sourceDocShort = "Generate documentation for tables" - sourceDocLong = `Generate documentation for tables - -If format is markdown, a destination directory will be created (if necessary) containing markdown files. -Example: -doc ./output - -If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. -Example: -doc --format json . -` -) - -func newCmdSourceDoc(serve *sourceServe) *cobra.Command { - format := newEnum([]string{"json", "markdown"}, "markdown") - cmd := &cobra.Command{ - Use: "doc ", - Short: sourceDocShort, - Long: sourceDocLong, - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - return serve.plugin.GeneratePluginDocs(args[0], format.Value) - }, - } - cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) - return cmd -} - -func newCmdSourceRoot(serve *sourceServe) *cobra.Command { - cmd := &cobra.Command{ - Use: fmt.Sprintf("%s ", serve.plugin.Name()), - } - cmd.AddCommand(newCmdSourceServe(serve)) - cmd.AddCommand(newCmdSourceDoc(serve)) - cmd.CompletionOptions.DisableDefaultCmd = true - cmd.Version = serve.plugin.Version() - return cmd -} diff --git a/serve/source_v2_test.go b/serve/source_v2_test.go deleted file mode 100644 index 8a541611e9..0000000000 --- a/serve/source_v2_test.go +++ /dev/null @@ -1,238 +0,0 @@ -package serve - -import ( - "bytes" - "context" - "encoding/json" - "io" - "net" - "sync" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/ipc" - pb "github.com/cloudquery/plugin-pb-go/pb/source/v2" - "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v3/plugins/source" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/rs/zerolog" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -type TestSourcePluginSpec struct { - Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` -} - -type testExecutionClient struct{} - -var _ schema.ClientMeta = &testExecutionClient{} - -// var errTestExecutionClientErr = fmt.Errorf("error in newTestExecutionClientErr") - -func testTable(name string) *schema.Table { - return &schema.Table{ - Name: name, - Resolver: func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil - }, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func newTestExecutionClient(context.Context, zerolog.Logger, specs.Source, source.Options) (schema.ClientMeta, error) { - return &testExecutionClient{}, nil -} - -func bufSourceDialer(context.Context, string) (net.Conn, error) { - testSourceListenerLock.Lock() - defer testSourceListenerLock.Unlock() - return testSourceListener.Dial() -} - -func TestSourceSuccess(t *testing.T) { - plugin := source.NewPlugin( - "testPlugin", - "v1.0.0", - []*schema.Table{testTable("test_table"), testTable("test_table2")}, - newTestExecutionClient) - - cmd := newCmdSourceRoot(&sourceServe{ - plugin: plugin, - }) - cmd.SetArgs([]string{"serve", "--network", "test"}) - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) - var wg sync.WaitGroup - wg.Add(1) - var serverErr error - go func() { - defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) - }() - defer func() { - cancel() - wg.Wait() - }() - for { - testSourceListenerLock.Lock() - if testSourceListener != nil { - testSourceListenerLock.Unlock() - break - } - testSourceListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } - - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufSourceDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) - if err != nil { - t.Fatalf("Failed to dial bufnet: %v", err) - } - c := pb.NewSourceClient(conn) - - getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) - if err != nil { - t.Fatal(err) - } - if getNameRes.Name != "testPlugin" { - t.Fatalf("expected name to be testPlugin but got %s", getNameRes.Name) - } - - getVersionResponse, err := c.GetVersion(ctx, &pb.GetVersion_Request{}) - if err != nil { - t.Fatal(err) - } - if getVersionResponse.Version != "v1.0.0" { - t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) - } - - spec := specs.Source{ - Name: "testSourcePlugin", - Version: "v1.0.0", - Path: "cloudquery/testSourcePlugin", - Registry: specs.RegistryGithub, - Tables: []string{"test_table"}, - Spec: TestSourcePluginSpec{Accounts: []string{"cloudquery/plugin-sdk"}}, - Destinations: []string{"test"}, - } - specMarshaled, err := json.Marshal(spec) - if err != nil { - t.Fatalf("Failed to marshal spec: %v", err) - } - - getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) - if err != nil { - t.Fatal(err) - } - - tables, err := schema.NewTablesFromBytes(getTablesRes.Tables) - if err != nil { - t.Fatal(err) - } - - if len(tables) != 2 { - t.Fatalf("Expected 2 tables but got %d", len(tables)) - } - if _, err := c.Init(ctx, &pb.Init_Request{Spec: specMarshaled}); err != nil { - t.Fatal(err) - } - - getTablesForSpecRes, err := c.GetDynamicTables(ctx, &pb.GetDynamicTables_Request{}) - if err != nil { - t.Fatal(err) - } - tables, err = schema.NewTablesFromBytes(getTablesForSpecRes.Tables) - if err != nil { - t.Fatal(err) - } - - if len(tables) != 1 { - t.Fatalf("Expected 1 table but got %d", len(tables)) - } - - syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) - if err != nil { - t.Fatal(err) - } - var resources []arrow.Record - for { - r, err := syncClient.Recv() - if err == io.EOF { - break - } - if err != nil { - t.Fatal(err) - } - rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) - if err != nil { - t.Fatal(err) - } - for rdr.Next() { - rec := rdr.Record() - rec.Retain() - resources = append(resources, rec) - } - } - - totalResources := 0 - for _, resource := range resources { - sc := resource.Schema() - tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) - if !ok { - t.Fatal("Expected table name metadata to be set") - } - if tableName != "test_table" { - t.Fatalf("Expected resource with table name test_table. got: %s", tableName) - } - if len(resource.Columns()) != 5 { - t.Fatalf("Expected resource with data length 3 but got %d", len(resource.Columns())) - } - totalResources++ - } - if totalResources != 1 { - t.Fatalf("Expected 1 resource on channel but got %d", totalResources) - } - - getMetricsRes, err := c.GetMetrics(ctx, &pb.GetMetrics_Request{}) - if err != nil { - t.Fatal(err) - } - var stats source.Metrics - if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { - t.Fatal(err) - } - - clientStats := stats.TableClient[""][""] - if clientStats.Resources != 1 { - t.Fatalf("Expected 1 resource but got %d", clientStats.Resources) - } - - if clientStats.Errors != 0 { - t.Fatalf("Expected 0 errors but got %d", clientStats.Errors) - } - - if clientStats.Panics != 0 { - t.Fatalf("Expected 0 panics but got %d", clientStats.Panics) - } - - cancel() - wg.Wait() - if serverErr != nil { - t.Fatal(serverErr) - } -} diff --git a/transformers/struct.go b/transformers/struct.go index 2296af865e..b6c97842c5 100644 --- a/transformers/struct.go +++ b/transformers/struct.go @@ -8,9 +8,9 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/caser" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/thoas/go-funk" "golang.org/x/exp/slices" ) diff --git a/transformers/struct_test.go b/transformers/struct_test.go index 55acfbef16..d59cc6588b 100644 --- a/transformers/struct_test.go +++ b/transformers/struct_test.go @@ -7,8 +7,8 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v3/schema" - "github.com/cloudquery/plugin-sdk/v3/types" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/go-cmp/cmp" "golang.org/x/exp/slices" ) From 85ef1712d3eec4f4491c78ea03776359fe2aed9d Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Tue, 30 May 2023 22:05:10 +0300 Subject: [PATCH 053/125] more wip --- internal/servers/destination/v0/specv3tov1.go | 44 ++--- internal/servers/destination/v1/specv3tov1.go | 44 ++--- plugin/managed_writer.go | 2 +- {internal/memdb => plugin}/memdb.go | 35 ++-- {internal/memdb => plugin}/memdb_test.go | 77 ++++----- plugin/nulls.go | 6 +- plugin/options.go | 3 +- plugin/plugin.go | 10 +- plugin/plugin_managed_source_test.go | 2 +- plugin/plugin_round_robin_test.go | 158 +----------------- plugin/testing_overwrite_deletestale.go | 9 +- plugin/testing_write.go | 6 +- plugin/testing_write_append.go | 2 +- plugin/testing_write_migrate.go | 2 +- plugin/testing_write_overwrite.go | 2 +- serve/plugin.go | 4 +- 16 files changed, 124 insertions(+), 282 deletions(-) rename {internal/memdb => plugin}/memdb.go (84%) rename {internal/memdb => plugin}/memdb_test.go (71%) diff --git a/internal/servers/destination/v0/specv3tov1.go b/internal/servers/destination/v0/specv3tov1.go index 31ab4fb5de..1e7146e507 100644 --- a/internal/servers/destination/v0/specv3tov1.go +++ b/internal/servers/destination/v0/specv3tov1.go @@ -7,22 +7,22 @@ import ( func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), DetrministicCqId: spec.DeterministicCQID, }, } switch spec.Scheduler { case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN default: panic("invalid scheduler " + spec.Scheduler.String()) } @@ -31,47 +31,47 @@ func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), + BatchSize: uint64(spec.BatchSize), BatchSizeBytes: uint64(spec.BatchSizeBytes), }, } switch spec.Registry { case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL default: panic("invalid registry " + spec.Registry.String()) } switch spec.WriteMode { case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE default: panic("invalid write mode " + spec.WriteMode.String()) } switch spec.PKMode { case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY } switch spec.MigrateMode { case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE default: panic("invalid migrate mode " + spec.MigrateMode.String()) } return newSpec -} \ No newline at end of file +} diff --git a/internal/servers/destination/v1/specv3tov1.go b/internal/servers/destination/v1/specv3tov1.go index 31ab4fb5de..1e7146e507 100644 --- a/internal/servers/destination/v1/specv3tov1.go +++ b/internal/servers/destination/v1/specv3tov1.go @@ -7,22 +7,22 @@ import ( func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), + Tables: spec.Tables, + SkipTables: spec.SkipTables, + Destinations: spec.Destinations, + Concurrency: uint64(spec.Concurrency), DetrministicCqId: spec.DeterministicCQID, }, } switch spec.Scheduler { case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN + newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN default: panic("invalid scheduler " + spec.Scheduler.String()) } @@ -31,47 +31,47 @@ func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { newSpec := pbPlugin.Spec{ - Name: spec.Name, + Name: spec.Name, Version: spec.Version, - Path: spec.Path, + Path: spec.Path, WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), + BatchSize: uint64(spec.BatchSize), BatchSizeBytes: uint64(spec.BatchSizeBytes), }, } switch spec.Registry { case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB + newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC + newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL + newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL default: panic("invalid registry " + spec.Registry.String()) } switch spec.WriteMode { case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE + newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE default: panic("invalid write mode " + spec.WriteMode.String()) } switch spec.PKMode { case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY + newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY } switch spec.MigrateMode { case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE + newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE default: panic("invalid migrate mode " + spec.MigrateMode.String()) } return newSpec -} \ No newline at end of file +} diff --git a/plugin/managed_writer.go b/plugin/managed_writer.go index 74092f785e..3ed8d26903 100644 --- a/plugin/managed_writer.go +++ b/plugin/managed_writer.go @@ -165,4 +165,4 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tab } p.workersLock.Unlock() return nil -} \ No newline at end of file +} diff --git a/internal/memdb/memdb.go b/plugin/memdb.go similarity index 84% rename from internal/memdb/memdb.go rename to plugin/memdb.go index c84c32255e..e13da4a9c0 100644 --- a/internal/memdb/memdb.go +++ b/plugin/memdb.go @@ -1,17 +1,14 @@ -package memdb +package plugin import ( "context" "fmt" - "os" "sync" - "testing" "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -26,21 +23,21 @@ type client struct { blockingWrite bool } -type Option func(*client) +type MemDBOption func(*client) -func WithErrOnWrite() Option { +func WithErrOnWrite() MemDBOption { return func(c *client) { c.errOnWrite = true } } -func WithBlockingWrite() Option { +func WithBlockingWrite() MemDBOption { return func(c *client) { c.blockingWrite = true } } -func GetNewClient(options ...Option) plugin.NewClientFunc { +func GetNewClient(options ...MemDBOption) NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -48,20 +45,12 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { + return func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { return c, nil } } -func getTestLogger(t *testing.T) zerolog.Logger { - t.Helper() - zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs - return zerolog.New(zerolog.NewTestWriter(t)).Output( - zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.StampMicro}, - ).Level(zerolog.DebugLevel).With().Timestamp().Logger() -} - -func NewClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (plugin.Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), @@ -69,7 +58,7 @@ func NewClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (plugin. }, nil } -func NewClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (plugin.Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -98,7 +87,7 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, metrics *plugin.Metrics, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { @@ -209,8 +198,8 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou return nil } -func (*client) Metrics() plugin.Metrics { - return plugin.Metrics{} +func (*client) Metrics() Metrics { + return Metrics{} } func (c *client) Close(context.Context) error { @@ -239,4 +228,4 @@ func (c *client) deleteStaleTable(_ context.Context, table *schema.Table, source } } c.memoryDB[tableName] = filteredTable -} \ No newline at end of file +} diff --git a/internal/memdb/memdb_test.go b/plugin/memdb_test.go similarity index 71% rename from internal/memdb/memdb_test.go rename to plugin/memdb_test.go index b5196d45f3..6b8b78ab9e 100644 --- a/internal/memdb/memdb_test.go +++ b/plugin/memdb_test.go @@ -1,4 +1,4 @@ -package memdb +package plugin import ( "context" @@ -7,14 +7,13 @@ import ( "github.com/apache/arrow/go/v13/arrow" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" "github.com/stretchr/testify/require" ) -var migrateStrategyOverwrite = plugin.MigrateStrategy{ +var migrateStrategyOverwrite = MigrateStrategy{ AddColumn: pbPlugin.WriteSpec_FORCE, AddColumnNotNull: pbPlugin.WriteSpec_FORCE, RemoveColumn: pbPlugin.WriteSpec_FORCE, @@ -22,7 +21,7 @@ var migrateStrategyOverwrite = plugin.MigrateStrategy{ ChangeColumn: pbPlugin.WriteSpec_FORCE, } -var migrateStrategyAppend = plugin.MigrateStrategy{ +var migrateStrategyAppend = MigrateStrategy{ AddColumn: pbPlugin.WriteSpec_FORCE, AddColumnNotNull: pbPlugin.WriteSpec_FORCE, RemoveColumn: pbPlugin.WriteSpec_FORCE, @@ -31,13 +30,13 @@ var migrateStrategyAppend = plugin.MigrateStrategy{ } func TestPluginUnmanagedClient(t *testing.T) { - plugin.PluginTestSuiteRunner( + PluginTestSuiteRunner( t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient) + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }, @@ -45,55 +44,55 @@ func TestPluginUnmanagedClient(t *testing.T) { } func TestPluginManagedClient(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter()) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter()) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), - plugin.WithDefaultBatchSize(1), - plugin.WithDefaultBatchSizeBytes(1)) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), + WithDefaultBatchSize(1), + WithDefaultBatchSizeBytes(1)) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient, plugin.WithManagedWriter(), - plugin.WithDefaultBatchSize(100000000), - plugin.WithDefaultBatchSizeBytes(100000000)) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), + WithDefaultBatchSize(100000000), + WithDefaultBatchSizeBytes(100000000)) }, pbPlugin.Spec{}, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) } func TestPluginManagedClientWithCQPKs(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewClient) + PluginTestSuiteRunner(t, + func() *Plugin { + return NewPlugin("test", "development", NewMemDBClient) }, pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{ PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, }, }, - plugin.PluginTestSuiteTests{ + PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) @@ -101,7 +100,7 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() - p := plugin.NewPlugin("test", "development", NewClientErrOnNew) + p := NewPlugin("test", "development", NewMemDBClientErrOnNew) err := p.Init(ctx, pbPlugin.Spec{}) if err == nil { @@ -112,7 +111,7 @@ func TestPluginOnNewError(t *testing.T) { func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) + p := NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } @@ -147,7 +146,7 @@ func TestOnWriteError(t *testing.T) { func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) + p := NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { t.Fatal(err) } @@ -187,19 +186,21 @@ func TestPluginInit(t *testing.T) { batchSizeObserved uint64 batchSizeBytesObserved uint64 ) - p := plugin.NewPlugin( + p := NewPlugin( "test", "development", - func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (plugin.Client, error) { + func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (Client, error) { batchSizeObserved = s.WriteSpec.BatchSize batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes - return NewClient(ctx, logger, s) + return NewMemDBClient(ctx, logger, s) }, - plugin.WithDefaultBatchSize(batchSize), - plugin.WithDefaultBatchSizeBytes(batchSizeBytes), + WithDefaultBatchSize(batchSize), + WithDefaultBatchSizeBytes(batchSizeBytes), ) - require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{})) + require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{}, + })) require.Equal(t, batchSize, batchSizeObserved) require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -} \ No newline at end of file +} diff --git a/plugin/nulls.go b/plugin/nulls.go index 12ad0facf7..02d80a5f1c 100644 --- a/plugin/nulls.go +++ b/plugin/nulls.go @@ -69,8 +69,4 @@ func (f AllowNullFunc) replaceNullsByEmpty(records []arrow.Record) { } records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) } -<<<<<<< HEAD:plugins/destination/nulls.go -} -======= -} ->>>>>>> 5ba1713 (wip):plugin/nulls.go +} \ No newline at end of file diff --git a/plugin/options.go b/plugin/options.go index d3104875e7..2a226724e0 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -45,7 +45,6 @@ func WithStaticTables(tables schema.Tables) Option { } } - func WithManagedWriter() Option { return func(p *Plugin) { p.managedWriter = true @@ -68,4 +67,4 @@ func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { return func(p *Plugin) { p.defaultBatchSizeBytes = defaultBatchSizeBytes } -} \ No newline at end of file +} diff --git a/plugin/plugin.go b/plugin/plugin.go index e812cf941c..a4847ca007 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -49,6 +49,10 @@ func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, res return fmt.Errorf("not implemented") } +func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error { + return fmt.Errorf("not implemented") +} + func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { return fmt.Errorf("not implemented") } @@ -111,8 +115,8 @@ type Plugin struct { syncTime time.Time managedWriter bool - workers map[string]*worker - workersLock *sync.Mutex + workers map[string]*worker + workersLock *sync.Mutex batchTimeout time.Duration defaultBatchSize int @@ -310,7 +314,7 @@ func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { if p.maxDepth > maxAllowedDepth { return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) } - } else { + } else if tables != nil { tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) if err != nil { return fmt.Errorf("failed to filter tables: %w", err) diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index 159c7dd9c8..cdaf02e616 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -8,7 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go index 64b6472387..e24c15d108 100644 --- a/plugin/plugin_round_robin_test.go +++ b/plugin/plugin_round_robin_test.go @@ -2,169 +2,17 @@ package plugin import ( "context" - "fmt" - "sync" "testing" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/rs/zerolog" ) -type testPluginClient struct { - memoryDB map[string][]arrow.Record - tables map[string]*schema.Table - spec pbPlugin.Spec - memoryDBLock sync.RWMutex -} - -type testPluginSpec struct { - ConnectionString string `json:"connection_string"` -} - -func (c *testPluginClient) ID() string { - return "test-plugin" -} - -func (c *testPluginClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { - c.memoryDBLock.RLock() - for tableName := range c.memoryDB { - for _, row := range c.memoryDB[tableName] { - res <- row - } - } - c.memoryDBLock.RUnlock() - return nil -} - -func (c *testPluginClient) Migrate(ctx context.Context, tables schema.Tables) error { - for _, table := range tables { - tableName := table.Name - memTable := c.memoryDB[tableName] - if memTable == nil { - c.memoryDB[tableName] = make([]arrow.Record, 0) - c.tables[tableName] = table - continue - } - - changes := table.GetChanges(c.tables[tableName]) - // memdb doesn't support any auto-migrate - if changes == nil { - continue - } - c.memoryDB[tableName] = make([]arrow.Record, 0) - c.tables[tableName] = table - } - return nil -} - -func (c *testPluginClient) Write(ctx context.Context, tables schema.Tables, resources <-chan arrow.Record) error { - for resource := range resources { - c.memoryDBLock.Lock() - sc := resource.Schema() - tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) - if !ok { - return fmt.Errorf("table name not found in schema metadata") - } - table := c.tables[tableName] - if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { - c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) - } else { - c.overwrite(table, resource) - } - c.memoryDBLock.Unlock() - } - return nil -} - -func (c *testPluginClient) overwrite(table *schema.Table, data arrow.Record) { - pksIndex := table.PrimaryKeysIndexes() - tableName := table.Name - for i, row := range c.memoryDB[tableName] { - found := true - for _, pkIndex := range pksIndex { - s1 := data.Column(pkIndex).String() - s2 := row.Column(pkIndex).String() - if s1 != s2 { - found = false - } - } - if found { - c.memoryDB[tableName] = append(c.memoryDB[tableName][:i], c.memoryDB[tableName][i+1:]...) - c.memoryDB[tableName] = append(c.memoryDB[tableName], data) - return - } - } - c.memoryDB[tableName] = append(c.memoryDB[tableName], data) -} - -func (c *testPluginClient) deleteStaleTable(_ context.Context, table *schema.Table, source string, syncTime time.Time) { - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - syncColIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) - tableName := table.Name - var filteredTable []arrow.Record - for i, row := range c.memoryDB[tableName] { - if row.Column(sourceColIndex).(*array.String).Value(0) == source { - rowSyncTime := row.Column(syncColIndex).(*array.Timestamp).Value(0).ToTime(arrow.Microsecond).UTC() - if !rowSyncTime.Before(syncTime) { - filteredTable = append(filteredTable, c.memoryDB[tableName][i]) - } - } - } - c.memoryDB[tableName] = filteredTable -} - -func (c *testPluginClient) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - for _, table := range tables { - c.deleteStaleTable(ctx, table, sourceName, syncTime) - } - return nil -} - -func (c *testPluginClient) Close(ctx context.Context) error { - c.memoryDB = nil - return nil -} - -func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - tableName := table.Name - if c.memoryDB[tableName] == nil { - return nil - } - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - if sourceColIndex == -1 { - return fmt.Errorf("table %s doesn't have source column", tableName) - } - var sortedRes []arrow.Record - c.memoryDBLock.RLock() - for _, row := range c.memoryDB[tableName] { - arr := row.Column(sourceColIndex) - if arr.(*array.String).Value(0) == sourceName { - sortedRes = append(sortedRes, row) - } - } - c.memoryDBLock.RUnlock() - - for _, row := range sortedRes { - res <- row - } - return nil -} - -func NewTestPluginClient(ctx context.Context, logger zerolog.Logger, spec pbPlugin.Spec) (Client, error) { - return &testPluginClient{ - memoryDB: make(map[string][]arrow.Record), - tables: make(map[string]*schema.Table), - spec: spec, - }, nil -} - func TestPluginRoundRobin(t *testing.T) { ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewTestPluginClient, WithUnmanaged()) + p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanaged()) testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) syncTime := time.Now().UTC() testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ @@ -227,4 +75,4 @@ func TestPluginRoundRobin(t *testing.T) { if err := p.Close(ctx); err != nil { t.Fatal(err) } -} \ No newline at end of file +} diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go index 788decd8a4..3b2266d080 100644 --- a/plugin/testing_overwrite_deletestale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -33,8 +33,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte sourceName := "testOverwriteSource" + uuid.NewString() sourceSpec := pbPlugin.Spec{ - Name: sourceName, - // Backend: specs.BackendLocal, + Name: sourceName, + BackendSpec: &pbPlugin.Spec{ + Name: "local", + Path: "cloudquery/local", + }, } opts := schema.GenTestDataOptions{ @@ -150,4 +153,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } return nil -} \ No newline at end of file +} diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 17fc3f6100..8f2b3da285 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -170,7 +170,9 @@ func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlugin.Spec, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { t.Helper() destSpec.Name = "testsuite" - + if destSpec.WriteSpec == nil { + destSpec.WriteSpec = &pbPlugin.WriteSpec{} + } suite := &PluginTestSuite{ tests: tests, } @@ -291,4 +293,4 @@ func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { } return first.Before(second) }) -} \ No newline at end of file +} diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go index d56d20287e..4720431062 100644 --- a/plugin/testing_write_append.go +++ b/plugin/testing_write_append.go @@ -93,4 +93,4 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } return nil -} \ No newline at end of file +} diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index d0c8b54ea8..9ac2021866 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -281,4 +281,4 @@ func (*PluginTestSuite) destinationPluginTestMigrate( require.NoError(t, p.Init(ctx, nonForced)) require.NoError(t, p.Migrate(ctx, schema.Tables{table})) }) -} \ No newline at end of file +} diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go index a7dba53037..12c8400053 100644 --- a/plugin/testing_write_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -109,4 +109,4 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } return nil -} \ No newline at end of file +} diff --git a/serve/plugin.go b/serve/plugin.go index 2fe9774262..0b0e1de290 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -33,9 +33,9 @@ import ( ) type pluginServe struct { - plugin *plugin.Plugin + plugin *plugin.Plugin destinationV0V1Server bool - sentryDSN string + sentryDSN string } type PluginOption func(*pluginServe) From d3da36649c667969cc5a522e248eadc21d7ecb78 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Wed, 31 May 2023 15:41:33 +0300 Subject: [PATCH 054/125] wip --- internal/servers/plugin/v3/plugin.go | 14 +++-- plugin/docs.go | 4 +- plugin/{docs_test.go.backup => docs_test.go} | 7 ++- plugin/memdb_test.go | 16 +++-- plugin/plugin.go | 44 ++++++++++--- ..._test.go => destination_v0_test.go.backup} | 0 serve/destination_v1_test.go | 32 +++------- serve/plugin.go | 63 ++++++++++++------- serve/plugin_test.go | 35 ++--------- 9 files changed, 117 insertions(+), 98 deletions(-) rename plugin/{docs_test.go.backup => docs_test.go} (92%) rename serve/{destination_v0_test.go => destination_v0_test.go.backup} (100%) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 8a117bee9f..29a58f1fce 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -45,9 +45,11 @@ func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) ( } func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { - // TODO: Fix this - tables := s.Plugin.StaticTables().ToArrowSchemas() - encoded, err := tables.Encode() + tables := s.Plugin.DynamicTables() + if tables == nil { + return &pb.GetDynamicTables_Response{}, nil + } + encoded, err := tables.ToArrowSchemas().Encode() if err != nil { return nil, fmt.Errorf("failed to encode tables: %w", err) } @@ -81,6 +83,10 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { var syncErr error ctx := stream.Context() + if req.SyncSpec == nil { + req.SyncSpec = &pb.SyncSpec{} + } + go func() { defer close(records) err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, records) @@ -229,7 +235,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { tmpDir := os.TempDir() defer os.RemoveAll(tmpDir) - err := s.Plugin.GeneratePluginDocs(s.Plugin.StaticTables(), tmpDir, req.Format) + err := s.Plugin.GeneratePluginDocs(tmpDir, req.Format) if err != nil { return fmt.Errorf("failed to generate docs: %w", err) } diff --git a/plugin/docs.go b/plugin/docs.go index e66bf7ebb2..b100ea649c 100644 --- a/plugin/docs.go +++ b/plugin/docs.go @@ -79,11 +79,11 @@ type templateData struct { } // GeneratePluginDocs creates table documentation for the source plugin based on its list of tables -func (p *Plugin) GeneratePluginDocs(tables schema.Tables, dir string, format pbPlugin.GenDocs_FORMAT) error { +func (p *Plugin) GeneratePluginDocs(dir string, format pbPlugin.GenDocs_FORMAT) error { if err := os.MkdirAll(dir, os.ModePerm); err != nil { return err } - + tables := p.staticTables setDestinationManagedCqColumns(tables) sortedTables := make(schema.Tables, 0, len(tables)) diff --git a/plugin/docs_test.go.backup b/plugin/docs_test.go similarity index 92% rename from plugin/docs_test.go.backup rename to plugin/docs_test.go index 06f271f9fd..878e006e88 100644 --- a/plugin/docs_test.go.backup +++ b/plugin/docs_test.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/bradleyjkemp/cupaloy/v2" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/stretchr/testify/require" @@ -120,14 +121,14 @@ var testTables = []*schema.Table{ } func TestGeneratePluginDocs(t *testing.T) { - p := NewPlugin("test", "v1.0.0", testTables, newTestExecutionClient) + p := NewPlugin("test", "v1.0.0", newTestExecutionClient, WithStaticTables(testTables)) cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) t.Run("Markdown", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, "markdown") + err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_MARKDOWN) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } @@ -146,7 +147,7 @@ func TestGeneratePluginDocs(t *testing.T) { t.Run("JSON", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, "json") + err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_JSON) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } diff --git a/plugin/memdb_test.go b/plugin/memdb_test.go index 6b8b78ab9e..4cfa954f1f 100644 --- a/plugin/memdb_test.go +++ b/plugin/memdb_test.go @@ -112,7 +112,9 @@ func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) p := NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { + if err := p.Init(ctx, pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{}, + }); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -147,7 +149,9 @@ func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) p := NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{}); err != nil { + if err := p.Init(ctx, pbPlugin.Spec{ + WriteSpec: &pbPlugin.WriteSpec{}, + }); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) @@ -178,8 +182,8 @@ func TestOnWriteCtxCancelled(t *testing.T) { func TestPluginInit(t *testing.T) { const ( - batchSize = 100 - batchSizeBytes = 1000 + batchSize = uint64(100) + batchSizeBytes = uint64(1000) ) var ( @@ -194,8 +198,8 @@ func TestPluginInit(t *testing.T) { batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes return NewMemDBClient(ctx, logger, s) }, - WithDefaultBatchSize(batchSize), - WithDefaultBatchSizeBytes(batchSizeBytes), + WithDefaultBatchSize(int(batchSize)), + WithDefaultBatchSizeBytes(int(batchSizeBytes)), ) require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{}, diff --git a/plugin/plugin.go b/plugin/plugin.go index a4847ca007..dc254f0827 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -20,6 +20,12 @@ import ( pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" ) +const ( + defaultBatchTimeoutSeconds = 20 + defaultBatchSize = 10000 + defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB +) + type Options struct { Backend backend.Backend } @@ -193,13 +199,17 @@ func maxDepth(tables schema.Tables) uint64 { func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ - name: name, - version: version, - internalColumns: true, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - newClient: newClient, - metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + name: name, + version: version, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, + metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + workersLock: &sync.Mutex{}, + batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, + defaultBatchSize: defaultBatchSize, + defaultBatchSizeBytes: defaultBatchSizeBytes, } for _, opt := range options { opt(&p) @@ -272,12 +282,30 @@ func (p *Plugin) Metrics() *Metrics { return p.metrics } +func (p *Plugin) setSpecDefaults(spec *pbPlugin.Spec) { + if spec.WriteSpec == nil { + spec.WriteSpec = &pbPlugin.WriteSpec{ + BatchSize: uint64(p.defaultBatchSize), + BatchSizeBytes: uint64(p.defaultBatchSizeBytes), + } + } + if spec.WriteSpec.BatchSize == 0 { + spec.WriteSpec.BatchSize = uint64(p.defaultBatchSize) + } + if spec.WriteSpec.BatchSizeBytes == 0 { + spec.WriteSpec.BatchSizeBytes = uint64(p.defaultBatchSizeBytes) + } + if spec.SyncSpec == nil { + spec.SyncSpec = &pbPlugin.SyncSpec{} + } +} + func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - + p.setSpecDefaults(&spec) var err error p.client, err = p.newClient(ctx, p.logger, spec) if err != nil { diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go.backup similarity index 100% rename from serve/destination_v0_test.go rename to serve/destination_v0_test.go.backup diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 0f55b90694..577fd8f9ff 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -13,9 +13,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs" - "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" - "github.com/cloudquery/plugin-sdk/v4/plugins/destination" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -23,12 +21,8 @@ import ( ) func TestDestinationV1(t *testing.T) { - plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &pluginServe{ - plugin: plugin, - } - cmd := newCmdPluginRoot(s) - cmd.SetArgs([]string{"serve", "--network", "test"}) + p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) var wg sync.WaitGroup @@ -36,27 +30,15 @@ func TestDestinationV1(t *testing.T) { var serverErr error go func() { defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) + serverErr = srv.Serve(ctx) }() defer func() { cancel() wg.Wait() }() - // wait for the server to start - for { - testPluginListenerLock.Lock() - if testPluginListener != nil { - testPluginListenerLock.Unlock() - break - } - testPluginListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufDestinationDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -148,7 +130,7 @@ func TestDestinationV1(t *testing.T) { } // serversDestination readCh := make(chan arrow.Record, 1) - if err := plugin.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Read(ctx, table, sourceName, readCh); err != nil { t.Fatal(err) } close(readCh) @@ -156,8 +138,8 @@ func TestDestinationV1(t *testing.T) { for resource := range readCh { totalResources++ if !array.RecordEqual(rec, resource) { - diff := destination.RecordDiff(rec, resource) - t.Fatalf("expected %v but got %v. Diff: %v", rec, resource, diff) + diff := plugin.RecordDiff(rec, resource) + t.Fatalf("diff at %d: %s", totalResources, diff) } } if totalResources != 1 { diff --git a/serve/plugin.go b/serve/plugin.go index 0b0e1de290..0adddb4b9c 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -1,15 +1,16 @@ package serve import ( + "context" "fmt" "net" "os" "os/signal" "strings" - "sync" "syscall" "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/types" pbDestinationV0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" pbDestinationV1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" @@ -34,8 +35,11 @@ import ( type pluginServe struct { plugin *plugin.Plugin + args []string destinationV0V1Server bool sentryDSN string + testListener bool + testListenerConn *bufconn.Listener } type PluginOption func(*pluginServe) @@ -54,28 +58,48 @@ func WithDestinationV0V1Server() PluginOption { } } -// lis used for unit testing grpc server and client -var testPluginListener *bufconn.Listener -var testPluginListenerLock sync.Mutex +// WithArgs used to serve the plugin with predefined args instead of os.Args +func WithArgs(args ...string) PluginOption { + return func(s *pluginServe) { + s.args = args + } +} + +// WithTestListener means that the plugin will be served with an in-memory listener +// available via testListener() method instead of a network listener. +func WithTestListener() PluginOption { + return func(s *pluginServe) { + s.testListener = true + s.testListenerConn = bufconn.Listen(testBufSize) + } +} const servePluginShort = `Start plugin server` -func Plugin(plugin *plugin.Plugin, opts ...PluginOption) { +func Plugin(plugin *plugin.Plugin, opts ...PluginOption) *pluginServe{ s := &pluginServe{ plugin: plugin, } for _, opt := range opts { opt(s) } - if err := newCmdPluginRoot(s).Execute(); err != nil { - sentry.CaptureMessage(err.Error()) - fmt.Println(err) - os.Exit(1) + return s +} + +func (s *pluginServe) bufPluginDialer(context.Context, string) (net.Conn, error) { + return s.testListenerConn.Dial() +} + +func (s *pluginServe) Serve(ctx context.Context) error { + types.RegisterAllExtensions() + cmd := s.newCmdPluginRoot() + if s.args != nil { + cmd.SetArgs(s.args) } + return cmd.ExecuteContext(ctx) } -// nolint:dupl -func newCmdPluginServe(serve *pluginServe) *cobra.Command { +func (serve *pluginServe) newCmdPluginServe() *cobra.Command { var address string var network string var noSentry bool @@ -107,11 +131,8 @@ func newCmdPluginServe(serve *pluginServe) *cobra.Command { // opts.Plugin.Logger = logger var listener net.Listener - if network == "test" { - testPluginListenerLock.Lock() - listener = bufconn.Listen(testBufSize) - testPluginListener = listener.(*bufconn.Listener) - testPluginListenerLock.Unlock() + if serve.testListener { + listener = serve.testListenerConn } else { listener, err = net.Listen(network, address) if err != nil { @@ -230,7 +251,7 @@ doc --format json . ` ) -func newCmdPluginDoc(serve *pluginServe) *cobra.Command { +func (serve *pluginServe) newCmdPluginDoc() *cobra.Command { format := newEnum([]string{"json", "markdown"}, "markdown") cmd := &cobra.Command{ Use: "doc ", @@ -239,19 +260,19 @@ func newCmdPluginDoc(serve *pluginServe) *cobra.Command { Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) - return serve.plugin.GeneratePluginDocs(serve.plugin.StaticTables(), args[0], pbFormat) + return serve.plugin.GeneratePluginDocs(args[0], pbFormat) }, } cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) return cmd } -func newCmdPluginRoot(serve *pluginServe) *cobra.Command { +func (serve *pluginServe) newCmdPluginRoot() *cobra.Command { cmd := &cobra.Command{ Use: fmt.Sprintf("%s ", serve.plugin.Name()), } - cmd.AddCommand(newCmdPluginServe(serve)) - cmd.AddCommand(newCmdPluginDoc(serve)) + cmd.AddCommand(serve.newCmdPluginServe()) + cmd.AddCommand(serve.newCmdPluginDoc()) cmd.CompletionOptions.DisableDefaultCmd = true cmd.Version = serve.plugin.Version() return cmd diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 9aeb864a4c..16f719abc8 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -5,16 +5,13 @@ import ( "context" "encoding/json" "io" - "net" "sync" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" - pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v0" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" - "github.com/cloudquery/plugin-sdk/v4/plugins/source" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "google.golang.org/grpc" @@ -65,23 +62,13 @@ func newTestExecutionClient(context.Context, zerolog.Logger, pb.Spec) (plugin.Cl return &testExecutionClient{}, nil } -func bufSourceDialer(context.Context, string) (net.Conn, error) { - testPluginListenerLock.Lock() - defer testPluginListenerLock.Unlock() - return testPluginListener.Dial() -} - func TestSourceSuccess(t *testing.T) { - plugin := plugin.NewPlugin( + p := plugin.NewPlugin( "testPlugin", "v1.0.0", newTestExecutionClient, plugin.WithStaticTables([]*schema.Table{testTable("test_table"), testTable("test_table2")})) - - cmd := newCmdPluginRoot(&pluginServe{ - plugin: plugin, - }) - cmd.SetArgs([]string{"serve", "--network", "test"}) + srv := Plugin(p, WithArgs("serve"), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) var wg sync.WaitGroup @@ -89,25 +76,15 @@ func TestSourceSuccess(t *testing.T) { var serverErr error go func() { defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) + serverErr = srv.Serve(ctx) }() defer func() { cancel() wg.Wait() }() - for { - testPluginListenerLock.Lock() - if testPluginListener != nil { - testPluginListenerLock.Unlock() - break - } - testPluginListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufSourceDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -216,7 +193,7 @@ func TestSourceSuccess(t *testing.T) { if err != nil { t.Fatal(err) } - var stats source.Metrics + var stats plugin.Metrics if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { t.Fatal(err) } From 2fbd64d792ce159cec677ccfc16fac478293b492 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Wed, 31 May 2023 15:58:23 +0300 Subject: [PATCH 055/125] more wip --- ..._test.go.backup => destination_v0_test.go} | 34 +++---------------- 1 file changed, 5 insertions(+), 29 deletions(-) rename serve/{destination_v0_test.go.backup => destination_v0_test.go} (82%) diff --git a/serve/destination_v0_test.go.backup b/serve/destination_v0_test.go similarity index 82% rename from serve/destination_v0_test.go.backup rename to serve/destination_v0_test.go index e59bcbfd88..6a206e53ad 100644 --- a/serve/destination_v0_test.go.backup +++ b/serve/destination_v0_test.go @@ -3,7 +3,6 @@ package serve import ( "context" "encoding/json" - "net" "sync" "testing" "time" @@ -17,7 +16,6 @@ import ( schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" - "github.com/cloudquery/plugin-sdk/v4/internal/memdb" serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" @@ -25,19 +23,9 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) -func bufDestinationDialer(context.Context, string) (net.Conn, error) { - testPluginListenerLock.Lock() - defer testPluginListenerLock.Unlock() - return testPluginListener.Dial() -} - func TestDestination(t *testing.T) { - plugin := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewClient) - s := &pluginServe{ - plugin: plugin, - } - cmd := newCmdPluginRoot(s) - cmd.SetArgs([]string{"serve", "--network", "test"}) + p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) var wg sync.WaitGroup @@ -45,27 +33,15 @@ func TestDestination(t *testing.T) { var serverErr error go func() { defer wg.Done() - serverErr = cmd.ExecuteContext(ctx) + serverErr = srv.Serve(ctx) }() defer func() { cancel() wg.Wait() }() - // wait for the server to start - for { - testPluginListenerLock.Lock() - if testPluginListener != nil { - testPluginListenerLock.Unlock() - break - } - testPluginListenerLock.Unlock() - t.Log("waiting for grpc server to start") - time.Sleep(time.Millisecond * 200) - } - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(bufDestinationDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -154,7 +130,7 @@ func TestDestination(t *testing.T) { // serversDestination table := serversDestination.TableV2ToV3(tableV2) readCh := make(chan arrow.Record, 1) - if err := plugin.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Read(ctx, table, sourceName, readCh); err != nil { t.Fatal(err) } close(readCh) From 3e76a4a44fab626e96d187f1bc0863fe0bc4aabc Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:27:41 +0300 Subject: [PATCH 056/125] more wip --- helpers/integers.go | 19 -- helpers/pointers.go | 20 -- helpers/pointers_test.go | 50 ---- helpers/strings.go | 39 --- helpers/strings_test.go | 20 -- internal/backends/local/local.go | 2 +- .../servers/destination/v0/destinations.go | 42 ++- internal/servers/destination/v0/specv3tov1.go | 77 ------ .../servers/destination/v1/destinations.go | 34 ++- internal/servers/destination/v1/specv3tov1.go | 77 ------ internal/servers/plugin/v3/plugin.go | 68 +++-- plugin/managed_writer.go | 27 +- plugin/memdb.go | 21 +- plugin/memdb_test.go | 31 --- plugin/options.go | 66 ++++- plugin/plugin.go | 255 ++---------------- plugin/plugin_managed_source_test.go | 41 +-- plugin/plugin_reader.go | 110 ++++++++ plugin/plugin_round_robin_test.go | 78 ------ plugin/plugin_test.go | 97 +++++++ plugin/plugin_writer.go | 69 +++++ plugin/scheduler_dfs.go | 7 +- plugin/scheduler_round_robin.go | 5 +- plugin/testing_overwrite_deletestale.go | 20 +- plugin/testing_sync.go | 45 ++-- plugin/testing_write.go | 56 ++-- plugin/testing_write_append.go | 17 +- plugin/testing_write_migrate.go | 53 ++-- plugin/testing_write_overwrite.go | 15 +- serve/plugin.go | 84 +++--- serve/plugin_test.go | 16 +- 31 files changed, 652 insertions(+), 909 deletions(-) delete mode 100644 helpers/integers.go delete mode 100644 helpers/pointers.go delete mode 100644 helpers/pointers_test.go delete mode 100644 helpers/strings.go delete mode 100644 helpers/strings_test.go delete mode 100644 internal/servers/destination/v0/specv3tov1.go delete mode 100644 internal/servers/destination/v1/specv3tov1.go create mode 100644 plugin/plugin_reader.go delete mode 100644 plugin/plugin_round_robin_test.go create mode 100644 plugin/plugin_test.go create mode 100644 plugin/plugin_writer.go diff --git a/helpers/integers.go b/helpers/integers.go deleted file mode 100644 index a539552377..0000000000 --- a/helpers/integers.go +++ /dev/null @@ -1,19 +0,0 @@ -package helpers - -import "math" - -// Uint64ToInt64 if value is greater than math.MaxInt64 return math.MaxInt64 -// otherwise returns original value cast to int64 -func Uint64ToInt64(i uint64) int64 { - if i > math.MaxInt64 { - return math.MaxInt64 - } - return int64(i) -} - -func Uint64ToInt(i uint64) int { - if i > math.MaxInt { - return math.MaxInt - } - return int(i) -} diff --git a/helpers/pointers.go b/helpers/pointers.go deleted file mode 100644 index 2f5a008535..0000000000 --- a/helpers/pointers.go +++ /dev/null @@ -1,20 +0,0 @@ -package helpers - -import "reflect" - -// ToPointer takes an any object and will return a pointer to this object -// if the object is not already a pointer. Otherwise, it will return the original value. -// It is safe to typecast the return-value of GetPointer into a pointer of the right type, -// except in very special cases (such as passing in nil without an explicit type) -func ToPointer(v any) any { - val := reflect.ValueOf(v) - if val.Kind() == reflect.Ptr { - return v - } - if !val.IsValid() { - return v - } - p := reflect.New(val.Type()) - p.Elem().Set(val) - return p.Interface() -} diff --git a/helpers/pointers_test.go b/helpers/pointers_test.go deleted file mode 100644 index 2ae81ed7a3..0000000000 --- a/helpers/pointers_test.go +++ /dev/null @@ -1,50 +0,0 @@ -package helpers - -import ( - "testing" -) - -type testStruct struct { - test string -} - -func TestToPointer(t *testing.T) { - // passing string should return pointer to string - give := "test" - got := ToPointer(give) - if _, ok := got.(*string); !ok { - t.Errorf("ToPointer(%q) returned %q, expected type *string", give, got) - } - - // passing struct by value should return pointer to (copy of the) struct - giveObj := testStruct{ - test: "value", - } - gotStruct := ToPointer(giveObj) - if _, ok := gotStruct.(*testStruct); !ok { - t.Errorf("ToPointer(%q) returned %q, expected type *testStruct", giveObj, gotStruct) - } - - // passing pointer should return the original pointer - ptr := &giveObj - gotPointer := ToPointer(ptr) - if gotPointer != ptr { - t.Errorf("ToPointer(%q) returned %q, expected %q", ptr, gotPointer, ptr) - } - - // passing nil should return nil back without panicking - gotNil := ToPointer(nil) - if gotNil != nil { - t.Errorf("ToPointer(%v) returned %q, expected nil", nil, gotNil) - } - - // passing number should return pointer to number - giveNumber := int64(0) - gotNumber := ToPointer(giveNumber) - if v, ok := gotNumber.(*int64); !ok { - t.Errorf("ToPointer(%q) returned %q, expected type *int64", giveNumber, gotNumber) - if *v != 0 { - t.Errorf("ToPointer(%q) returned %q, expected 0", giveNumber, gotNumber) - } - } -} diff --git a/helpers/strings.go b/helpers/strings.go deleted file mode 100644 index e522a3c5ea..0000000000 --- a/helpers/strings.go +++ /dev/null @@ -1,39 +0,0 @@ -package helpers - -import ( - "fmt" - "sort" - "strings" - - "github.com/spf13/cast" -) - -func FormatSlice(a []string) string { - // sort slice for consistency - sort.Strings(a) - q := make([]string, len(a)) - for i, s := range a { - q[i] = fmt.Sprintf("%q", s) - } - return fmt.Sprintf("[\n\t%s\n]", strings.Join(q, ",\n\t")) -} - -func HasDuplicates(resources []string) bool { - dups := make(map[string]bool, len(resources)) - for _, r := range resources { - if _, ok := dups[r]; ok { - return true - } - dups[r] = true - } - return false -} - -func ToStringSliceE(i any) ([]string, error) { - switch v := i.(type) { - case *[]string: - return cast.ToStringSliceE(*v) - default: - return cast.ToStringSliceE(i) - } -} diff --git a/helpers/strings_test.go b/helpers/strings_test.go deleted file mode 100644 index 991492df8e..0000000000 --- a/helpers/strings_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package helpers - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestHasDuplicates(t *testing.T) { - assert.False(t, HasDuplicates([]string{"A", "b", "c"})) - assert.False(t, HasDuplicates([]string{"A", "a", "c"})) - assert.True(t, HasDuplicates([]string{"a", "a", "c"})) - assert.True(t, HasDuplicates([]string{"a", "a", "c", "c", "f"})) -} - -func TestToStingSliceE(t *testing.T) { - arr := &[]string{"a", "b", "c"} - newArr, _ := ToStringSliceE(arr) - assert.Equal(t, newArr, []string{"a", "b", "c"}) -} diff --git a/internal/backends/local/local.go b/internal/backends/local/local.go index 0593d8b0b0..f593260dde 100644 --- a/internal/backends/local/local.go +++ b/internal/backends/local/local.go @@ -10,7 +10,7 @@ import ( "strings" "sync" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" ) type Local struct { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index c315bd0652..93fe380574 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -10,7 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -22,9 +22,11 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *plugin.Plugin - Logger zerolog.Logger - spec specs.Destination + Plugin *plugin.Plugin + Logger zerolog.Logger + spec specs.Destination + writeMode plugin.WriteMode + migrateMode plugin.MigrateMode } func (*Server) GetProtocolVersion(context.Context, *pbBase.GetProtocolVersion_Request) (*pbBase.GetProtocolVersion_Response, error) { @@ -39,8 +41,21 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - specV3 := SpecV1ToV3(spec) - return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, specV3) + switch s.spec.WriteMode { + case specs.WriteModeAppend: + s.writeMode = plugin.WriteModeAppend + case specs.WriteModeOverwrite: + s.writeMode = plugin.WriteModeOverwrite + case specs.WriteModeOverwriteDeleteStale: + s.writeMode = plugin.WriteModeOverwriteDeleteStale + } + switch s.spec.MigrateMode { + case specs.MigrateModeSafe: + s.migrateMode = plugin.MigrateModeSafe + case specs.MigrateModeForced: + s.migrateMode = plugin.MigrateModeForced + } + return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) } func (s *Server) GetName(context.Context, *pbBase.GetName_Request) (*pbBase.GetName_Response, error) { @@ -64,7 +79,16 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) + var migrateMode plugin.MigrateMode + switch s.spec.MigrateMode { + case specs.MigrateModeSafe: + migrateMode = plugin.MigrateModeSafe + case specs.MigrateModeForced: + migrateMode = plugin.MigrateModeForced + default: + return nil, status.Errorf(codes.InvalidArgument, "invalid migrate mode: %v", s.spec.MigrateMode) + } + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } func (*Server) Write(pb.Destination_WriteServer) error { @@ -103,9 +127,9 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) + sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) }) sourceColumn := &schemav2.Text{} _ = sourceColumn.Set(sourceSpec.Name) diff --git a/internal/servers/destination/v0/specv3tov1.go b/internal/servers/destination/v0/specv3tov1.go deleted file mode 100644 index 1e7146e507..0000000000 --- a/internal/servers/destination/v0/specv3tov1.go +++ /dev/null @@ -1,77 +0,0 @@ -package destination - -import ( - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-pb-go/specs" -) - -func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), - DetrministicCqId: spec.DeterministicCQID, - }, - } - switch spec.Scheduler { - case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS - case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN - default: - panic("invalid scheduler " + spec.Scheduler.String()) - } - return newSpec -} - -func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), - BatchSizeBytes: uint64(spec.BatchSizeBytes), - }, - } - switch spec.Registry { - case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB - case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC - case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL - default: - panic("invalid registry " + spec.Registry.String()) - } - switch spec.WriteMode { - case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE - default: - panic("invalid write mode " + spec.WriteMode.String()) - } - switch spec.PKMode { - case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT - case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY - } - switch spec.MigrateMode { - case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - default: - panic("invalid migrate mode " + spec.MigrateMode.String()) - } - return newSpec -} diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 4748c3c947..c65c6b4256 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -10,7 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -21,9 +21,11 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *plugin.Plugin - Logger zerolog.Logger - spec specs.Destination + Plugin *plugin.Plugin + Logger zerolog.Logger + spec specs.Destination + writeMode plugin.WriteMode + migrateMode plugin.MigrateMode } func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb.Configure_Response, error) { @@ -32,8 +34,21 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - specV3 := SpecV1ToV3(spec) - return &pb.Configure_Response{}, s.Plugin.Init(ctx, specV3) + switch s.spec.WriteMode { + case specs.WriteModeAppend: + s.writeMode = plugin.WriteModeAppend + case specs.WriteModeOverwrite: + s.writeMode = plugin.WriteModeOverwrite + case specs.WriteModeOverwriteDeleteStale: + s.writeMode = plugin.WriteModeOverwriteDeleteStale + } + switch s.spec.MigrateMode { + case specs.MigrateModeSafe: + s.migrateMode = plugin.MigrateModeSafe + case specs.MigrateModeForced: + s.migrateMode = plugin.MigrateModeForced + } + return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) } func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { @@ -59,7 +74,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr } s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, s.migrateMode) } // Note the order of operations in this method is important! @@ -97,9 +112,10 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { syncTime := r.Timestamp.AsTime() s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceSpecV3 := SourceSpecV1ToV3(sourceSpec) + sourceName := r.Source + eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpecV3, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) }) for { diff --git a/internal/servers/destination/v1/specv3tov1.go b/internal/servers/destination/v1/specv3tov1.go deleted file mode 100644 index 1e7146e507..0000000000 --- a/internal/servers/destination/v1/specv3tov1.go +++ /dev/null @@ -1,77 +0,0 @@ -package destination - -import ( - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-pb-go/specs" -) - -func SourceSpecV1ToV3(spec specs.Source) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - SyncSpec: &pbPlugin.SyncSpec{ - Tables: spec.Tables, - SkipTables: spec.SkipTables, - Destinations: spec.Destinations, - Concurrency: uint64(spec.Concurrency), - DetrministicCqId: spec.DeterministicCQID, - }, - } - switch spec.Scheduler { - case specs.SchedulerDFS: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_DFS - case specs.SchedulerRoundRobin: - newSpec.SyncSpec.Scheduler = pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN - default: - panic("invalid scheduler " + spec.Scheduler.String()) - } - return newSpec -} - -func SpecV1ToV3(spec specs.Destination) pbPlugin.Spec { - newSpec := pbPlugin.Spec{ - Name: spec.Name, - Version: spec.Version, - Path: spec.Path, - WriteSpec: &pbPlugin.WriteSpec{ - BatchSize: uint64(spec.BatchSize), - BatchSizeBytes: uint64(spec.BatchSizeBytes), - }, - } - switch spec.Registry { - case specs.RegistryGithub: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GITHUB - case specs.RegistryGrpc: - newSpec.Registry = pbPlugin.Spec_REGISTRY_GRPC - case specs.RegistryLocal: - newSpec.Registry = pbPlugin.Spec_REGISTRY_LOCAL - default: - panic("invalid registry " + spec.Registry.String()) - } - switch spec.WriteMode { - case specs.WriteModeAppend: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - case specs.WriteModeOverwrite: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - case specs.WriteModeOverwriteDeleteStale: - newSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE - default: - panic("invalid write mode " + spec.WriteMode.String()) - } - switch spec.PKMode { - case specs.PKModeDefaultKeys: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_DEFAULT - case specs.PKModeCQID: - newSpec.WriteSpec.PkMode = pbPlugin.WriteSpec_CQ_ID_ONLY - } - switch spec.MigrateMode { - case specs.MigrateModeSafe: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - case specs.MigrateModeForced: - newSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - default: - panic("invalid migrate mode " + spec.MigrateMode.String()) - } - return newSpec -} diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 29a58f1fce..bbeb968859 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "path/filepath" @@ -30,7 +29,6 @@ type Server struct { pb.UnimplementedPluginServer Plugin *plugin.Plugin Logger zerolog.Logger - spec pb.Spec } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -71,10 +69,9 @@ func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVer } func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - if err := s.Plugin.Init(ctx, *req.Spec); err != nil { + if err := s.Plugin.Init(ctx, req.Spec); err != nil { return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) } - s.spec = *req.Spec return &pb.Init_Response{}, nil } @@ -83,20 +80,27 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { var syncErr error ctx := stream.Context() - if req.SyncSpec == nil { - req.SyncSpec = &pb.SyncSpec{} + syncOptions := plugin.SyncOptions{ + Tables: req.Tables, + SkipTables: req.SkipTables, + Concurrency: req.Concurrency, + Scheduler: plugin.SchedulerDFS, } + if req.Scheduler == pb.SCHEDULER_SCHEDULER_ROUND_ROBIN { + syncOptions.Scheduler = plugin.SchedulerRoundRobin + } + + sourceName := req.SourceName go func() { defer close(records) - err := s.Plugin.Sync(ctx, req.SyncTime.AsTime(), *req.SyncSpec, records) + err := s.Plugin.Sync(ctx, sourceName, req.SyncTime.AsTime(), syncOptions, records) if err != nil { syncErr = fmt.Errorf("failed to sync records: %w", err) } }() for rec := range records { - var buf bytes.Buffer w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) if err := w.Write(rec); err != nil { @@ -158,8 +162,18 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr if err != nil { return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables) + if req.PkMode == pb.PK_MODE_CQ_ID_ONLY { + setCQIDAsPrimaryKeysForTables(tables) + } + migrateMode := plugin.MigrateModeSafe + switch req.MigrateMode { + case pb.MIGRATE_MODE_SAFE: + migrateMode = plugin.MigrateModeSafe + case pb.MIGRATE_MODE_FORCE: + migrateMode = plugin.MigrateModeForced + } + // switch req. + return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } func (s *Server) Write(msg pb.Plugin_WriteServer) error { @@ -181,12 +195,23 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { if err != nil { return status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - s.setPKsForTables(tables) - sourceSpec := *r.SourceSpec - syncTime := r.Timestamp.AsTime() + if r.PkMode == pb.PK_MODE_CQ_ID_ONLY { + setCQIDAsPrimaryKeysForTables(tables) + } + sourceName := r.SourceName + syncTime := r.SyncTime.AsTime() + writeMode := plugin.WriteModeOverwrite + switch r.WriteMode { + case pb.WRITE_MODE_WRITE_MODE_APPEND: + writeMode = plugin.WriteModeAppend + case pb.WRITE_MODE_WRITE_MODE_OVERWRITE: + writeMode = plugin.WriteModeOverwrite + case pb.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE: + writeMode = plugin.WriteModeOverwriteDeleteStale + } eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceSpec, tables, syncTime, resources) + return s.Plugin.Write(ctx, sourceName, tables, syncTime, writeMode, resources) }) for { @@ -233,15 +258,18 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { - tmpDir := os.TempDir() + tmpDir, err := os.MkdirTemp("", "cloudquery-docs") + if err != nil { + return fmt.Errorf("failed to create tmp dir: %w", err) + } defer os.RemoveAll(tmpDir) - err := s.Plugin.GeneratePluginDocs(tmpDir, req.Format) + err = s.Plugin.GeneratePluginDocs(tmpDir, req.Format) if err != nil { return fmt.Errorf("failed to generate docs: %w", err) } // list files in tmpDir - files, err := ioutil.ReadDir(tmpDir) + files, err := os.ReadDir(tmpDir) if err != nil { return fmt.Errorf("failed to read tmp dir: %w", err) } @@ -281,12 +309,6 @@ func checkMessageSize(msg proto.Message, record arrow.Record) error { return nil } -func (s *Server) setPKsForTables(tables schema.Tables) { - if s.spec.WriteSpec.PkMode == pb.WriteSpec_CQ_ID_ONLY { - setCQIDAsPrimaryKeysForTables(tables) - } -} - func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { for _, table := range tables { for i, col := range table.Columns { diff --git a/plugin/managed_writer.go b/plugin/managed_writer.go index 3ed8d26903..6af7fd0004 100644 --- a/plugin/managed_writer.go +++ b/plugin/managed_writer.go @@ -8,7 +8,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" - "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/internal/pk" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -20,7 +19,7 @@ type worker struct { flush chan chan bool } -func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Table, ch <-chan arrow.Record, flush <-chan chan bool) { +func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, ch <-chan arrow.Record, flush <-chan chan bool) { sizeBytes := int64(0) resources := make([]arrow.Record, 0) for { @@ -28,27 +27,27 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab case r, ok := <-ch: if !ok { if len(resources) > 0 { - p.flush(ctx, metrics, table, resources) + p.flush(ctx, metrics, table, writeMode, resources) } return } - if uint64(len(resources)) == p.spec.WriteSpec.BatchSize || sizeBytes+util.TotalRecordSize(r) > int64(p.spec.WriteSpec.BatchSizeBytes) { - p.flush(ctx, metrics, table, resources) - resources = resources[:0] // allows for mem reuse + if uint64(len(resources)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { + p.flush(ctx, metrics, table, writeMode, resources) + resources = make([]arrow.Record, 0) sizeBytes = 0 } resources = append(resources, r) sizeBytes += util.TotalRecordSize(r) case <-time.After(p.batchTimeout): if len(resources) > 0 { - p.flush(ctx, metrics, table, resources) - resources = resources[:0] // allows for mem reuse + p.flush(ctx, metrics, table, writeMode, resources) + resources = make([]arrow.Record, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - p.flush(ctx, metrics, table, resources) - resources = resources[:0] // allows for mem reuse + p.flush(ctx, metrics, table, writeMode, resources) + resources = make([]arrow.Record, 0) sizeBytes = 0 } done <- true @@ -59,11 +58,11 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab } } -func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Table, resources []arrow.Record) { +func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, resources []arrow.Record) { resources = p.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) - if err := p.client.WriteTableBatch(ctx, table, resources); err != nil { + if err := p.client.WriteTableBatch(ctx, table, writeMode, resources); err != nil { p.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") // we don't return an error as we need to continue until channel is closed otherwise there will be a deadlock // atomic.AddUint64(&metrics.Errors, uint64(batchSize)) @@ -100,7 +99,7 @@ func (*Plugin) removeDuplicatesByPK(table *schema.Table, resources []arrow.Recor return res } -func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { +func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { workers := make(map[string]*worker, len(tables)) metrics := &Metrics{} @@ -120,7 +119,7 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, _ specs.Source, tab wg.Add(1) go func() { defer wg.Done() - p.worker(ctx, metrics, table, ch, flush) + p.worker(ctx, metrics, table, writeMode, ch, flush) }() } else { p.workers[table.Name].count++ diff --git a/plugin/memdb.go b/plugin/memdb.go index e13da4a9c0..4a99073935 100644 --- a/plugin/memdb.go +++ b/plugin/memdb.go @@ -8,14 +8,12 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) // client is mostly used for testing the destination plugin. type client struct { - spec pbPlugin.Spec memoryDB map[string][]arrow.Record tables map[string]*schema.Table memoryDBLock sync.RWMutex @@ -45,20 +43,19 @@ func GetNewClient(options ...MemDBOption) NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { + return func(context.Context, zerolog.Logger, any) (Client, error) { return c, nil } } -func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec pbPlugin.Spec) (Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), - spec: spec, }, nil } -func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -87,7 +84,7 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { @@ -98,7 +95,7 @@ func (c *client) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Re return nil } -func (c *client) Migrate(_ context.Context, tables schema.Tables) error { +func (c *client) Migrate(_ context.Context, tables schema.Tables, migrateMode MigrateMode) error { for _, table := range tables { tableName := table.Name memTable := c.memoryDB[tableName] @@ -144,7 +141,7 @@ func (c *client) Read(_ context.Context, table *schema.Table, source string, res return nil } -func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan arrow.Record) error { +func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode, resources <-chan arrow.Record) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -164,7 +161,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan ar return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { + if writeMode == WriteModeAppend { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -174,7 +171,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, resources <-chan ar return nil } -func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error { +func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, resources []arrow.Record) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -188,7 +185,7 @@ func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, resou tableName := table.Name for _, resource := range resources { c.memoryDBLock.Lock() - if c.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_APPEND { + if writeMode == WriteModeAppend { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) diff --git a/plugin/memdb_test.go b/plugin/memdb_test.go index 4cfa954f1f..d89a70ac0d 100644 --- a/plugin/memdb_test.go +++ b/plugin/memdb_test.go @@ -9,8 +9,6 @@ import ( pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" - "github.com/rs/zerolog" - "github.com/stretchr/testify/require" ) var migrateStrategyOverwrite = MigrateStrategy{ @@ -179,32 +177,3 @@ func TestOnWriteCtxCancelled(t *testing.T) { t.Fatal(err) } } - -func TestPluginInit(t *testing.T) { - const ( - batchSize = uint64(100) - batchSizeBytes = uint64(1000) - ) - - var ( - batchSizeObserved uint64 - batchSizeBytesObserved uint64 - ) - p := NewPlugin( - "test", - "development", - func(ctx context.Context, logger zerolog.Logger, s pbPlugin.Spec) (Client, error) { - batchSizeObserved = s.WriteSpec.BatchSize - batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes - return NewMemDBClient(ctx, logger, s) - }, - WithDefaultBatchSize(int(batchSize)), - WithDefaultBatchSizeBytes(int(batchSizeBytes)), - ) - require.NoError(t, p.Init(context.TODO(), pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{}, - })) - - require.Equal(t, batchSize, batchSizeObserved) - require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -} diff --git a/plugin/options.go b/plugin/options.go index 2a226724e0..fa2b3b53f2 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,12 +1,74 @@ package plugin import ( + "bytes" "context" "time" "github.com/cloudquery/plugin-sdk/v4/schema" ) +type MigrateMode int + +const ( + MigrateModeSafe MigrateMode = iota + MigrateModeForced +) + +var ( + migrateModeStrings = []string{"safe", "forced"} +) + +func (m MigrateMode) String() string { + return migrateModeStrings[m] +} + +type WriteMode int + +const ( + WriteModeOverwriteDeleteStale WriteMode = iota + WriteModeOverwrite + WriteModeAppend +) + +var ( + writeModeStrings = []string{"overwrite-delete-stale", "overwrite", "append"} +) + +func (m WriteMode) String() string { + return writeModeStrings[m] +} + +type Scheduler int + +const ( + SchedulerDFS Scheduler = iota + SchedulerRoundRobin +) + +var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} +var AllSchedulerNames = [...]string{ + SchedulerDFS: "dfs", + SchedulerRoundRobin: "round-robin", +} + +type Schedulers []Scheduler + +func (s Schedulers) String() string { + var buffer bytes.Buffer + for i, scheduler := range s { + if i > 0 { + buffer.WriteString(", ") + } + buffer.WriteString(scheduler.String()) + } + return buffer.String() +} + +func (s Scheduler) String() string { + return AllSchedulerNames[s] +} + type GetTables func(ctx context.Context, c Client) (schema.Tables, error) type Option func(*Plugin) @@ -25,9 +87,9 @@ func WithNoInternalColumns() Option { } } -func WithUnmanaged() Option { +func WithUnmanagedSync() Option { return func(p *Plugin) { - p.unmanaged = true + p.unmanagedSync = true } } diff --git a/plugin/plugin.go b/plugin/plugin.go index dc254f0827..754fd424d6 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,17 +7,12 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/backend" "github.com/cloudquery/plugin-sdk/v4/caser" - "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" "golang.org/x/sync/semaphore" - - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" ) const ( @@ -32,14 +27,14 @@ type Options struct { type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) -type NewClientFunc func(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) +type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { ID() string - Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error - Migrate(ctx context.Context, tables schema.Tables) error - WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error - Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error + Sync(ctx context.Context, res chan<- arrow.Record) error + Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error + WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error + Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error @@ -65,7 +60,7 @@ func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { +func (UnimplementedSync) Sync(ctx context.Context, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } @@ -110,15 +105,17 @@ type Plugin struct { // backend is the backend used to store the cursor state backend backend.Backend // spec is the spec the client was initialized with - spec pbPlugin.Spec + spec any // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id // useful for sources such as PostgreSQL and other databases internalColumns bool - // unmanaged if set to true then the plugin will call Sync directly and not use the scheduler - unmanaged bool + // unmanagedSync if set to true then the plugin will call Sync directly and not use the scheduler + unmanagedSync bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string - syncTime time.Time + titleTransformer func(*schema.Table) string + syncTime time.Time + sourceName string + deterministicCQId bool managedWriter bool workers map[string]*worker @@ -146,7 +143,7 @@ func (p *Plugin) addInternalColumns(tables []*schema.Table) error { cqSourceName := schema.CqSourceNameColumn cqSyncTime := schema.CqSyncTimeColumn cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.spec.Name) + return resource.Set(c.Name, p.sourceName) } cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { return resource.Set(c.Name, p.syncTime) @@ -197,6 +194,8 @@ func maxDepth(tables schema.Tables) uint64 { return depth } +// NewPlugin returns a new CloudQuery Plugin with the given name, version and implementation. +// Depending on the options, it can be write only plugin, read only plugin or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ name: name, @@ -206,6 +205,7 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... titleTransformer: DefaultTitleTransformer, newClient: newClient, metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + workers: make(map[string]*worker), workersLock: &sync.Mutex{}, batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, defaultBatchSize: defaultBatchSize, @@ -215,6 +215,10 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... opt(&p) } if p.staticTables != nil { + setParents(p.staticTables, nil) + if err := transformTables(p.staticTables); err != nil { + panic(err) + } if p.internalColumns { if err := p.addInternalColumns(p.staticTables); err != nil { panic(err) @@ -246,66 +250,16 @@ func (p *Plugin) SetLogger(logger zerolog.Logger) { p.logger = logger.With().Str("module", p.name+"-src").Logger() } -// Tables returns all tables supported by this source plugin -func (p *Plugin) StaticTables() schema.Tables { - return p.staticTables -} - -func (p *Plugin) HasDynamicTables() bool { - return p.getDynamicTables != nil -} - -func (p *Plugin) DynamicTables() schema.Tables { - return p.sessionTables -} - -func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { - var readErr error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - readErr = p.Read(ctx, table, sourceName, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, readErr -} - -func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return p.client.Read(ctx, table, sourceName, res) -} - func (p *Plugin) Metrics() *Metrics { return p.metrics } -func (p *Plugin) setSpecDefaults(spec *pbPlugin.Spec) { - if spec.WriteSpec == nil { - spec.WriteSpec = &pbPlugin.WriteSpec{ - BatchSize: uint64(p.defaultBatchSize), - BatchSizeBytes: uint64(p.defaultBatchSizeBytes), - } - } - if spec.WriteSpec.BatchSize == 0 { - spec.WriteSpec.BatchSize = uint64(p.defaultBatchSize) - } - if spec.WriteSpec.BatchSizeBytes == 0 { - spec.WriteSpec.BatchSizeBytes = uint64(p.defaultBatchSizeBytes) - } - if spec.SyncSpec == nil { - spec.SyncSpec = &pbPlugin.SyncSpec{} - } -} - -func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { +// Init initializes the plugin with the given spec. +func (p *Plugin) Init(ctx context.Context, spec any) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - p.setSpecDefaults(&spec) var err error p.client, err = p.newClient(ctx, p.logger, spec) if err != nil { @@ -313,167 +267,6 @@ func (p *Plugin) Init(ctx context.Context, spec pbPlugin.Spec) error { } p.spec = spec - tables := p.staticTables - if p.getDynamicTables != nil { - tables, err = p.getDynamicTables(ctx, p.client) - if err != nil { - return fmt.Errorf("failed to get dynamic tables: %w", err) - } - - tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - if len(tables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - - setParents(tables, nil) - if err := transformTables(tables); err != nil { - return err - } - if p.internalColumns { - if err := p.addInternalColumns(tables); err != nil { - return err - } - } - - p.maxDepth = maxDepth(tables) - if p.maxDepth > maxAllowedDepth { - return fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) - } - } else if tables != nil { - tables, err = tables.FilterDfs(spec.SyncSpec.Tables, spec.SyncSpec.SkipTables, true) - if err != nil { - return fmt.Errorf("failed to filter tables: %w", err) - } - } - p.sessionTables = tables - - return nil -} - -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables) error { - return p.client.Migrate(ctx, tables) -} - -func (p *Plugin) writeUnmanaged(ctx context.Context, _ specs.Source, tables schema.Tables, _ time.Time, res <-chan arrow.Record) error { - return p.client.Write(ctx, tables, res) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resource arrow.Record) error { - resources := []arrow.Record{resource} - return p.writeAll(ctx, sourceSpec, syncTime, resources) -} - -// this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, sourceSpec pbPlugin.Spec, syncTime time.Time, resources []arrow.Record) error { - ch := make(chan arrow.Record, len(resources)) - for _, resource := range resources { - ch <- resource - } - close(ch) - tables := make(schema.Tables, 0) - tableNames := make(map[string]struct{}) - for _, resource := range resources { - sc := resource.Schema() - tableMD := sc.Metadata() - name, found := tableMD.GetValue(schema.MetadataTableName) - if !found { - return fmt.Errorf("missing table name") - } - if _, ok := tableNames[name]; ok { - continue - } - table, err := schema.NewTableFromArrowSchema(resource.Schema()) - if err != nil { - return err - } - tables = append(tables, table) - tableNames[table.Name] = struct{}{} - } - return p.Write(ctx, sourceSpec, tables, syncTime, ch) -} - -func (p *Plugin) Write(ctx context.Context, sourceSpec pbPlugin.Spec, tables schema.Tables, syncTime time.Time, res <-chan arrow.Record) error { - syncTime = syncTime.UTC() - if err := p.client.Write(ctx, tables, res); err != nil { - return err - } - if p.spec.WriteSpec.WriteMode == pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE { - tablesToDelete := tables - if sourceSpec.BackendSpec != nil { - tablesToDelete = make(schema.Tables, 0, len(tables)) - for _, t := range tables { - if !t.IsIncremental { - tablesToDelete = append(tablesToDelete, t) - } - } - } - if err := p.DeleteStale(ctx, tablesToDelete, sourceSpec.Name, syncTime); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - syncTime = syncTime.UTC() - return p.client.DeleteStale(ctx, tables, sourceName, syncTime) -} - -func (p *Plugin) syncAll(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec) ([]arrow.Record, error) { - var err error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - err = p.Sync(ctx, syncTime, syncSpec, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, err -} - -// Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, syncTime time.Time, syncSpec pbPlugin.SyncSpec, res chan<- arrow.Record) error { - if !p.mu.TryLock() { - return fmt.Errorf("plugin already in use") - } - defer p.mu.Unlock() - p.syncTime = syncTime - - startTime := time.Now() - if p.unmanaged { - if err := p.client.Sync(ctx, p.metrics, res); err != nil { - return fmt.Errorf("failed to sync unmanaged client: %w", err) - } - } else { - resources := make(chan *schema.Resource) - go func() { - defer close(resources) - switch syncSpec.Scheduler { - case pbPlugin.SyncSpec_SCHEDULER_DFS: - p.syncDfs(ctx, syncSpec, p.client, p.sessionTables, resources) - case pbPlugin.SyncSpec_SCHEDULER_ROUND_ROBIN: - p.syncRoundRobin(ctx, syncSpec, p.client, p.sessionTables, resources) - default: - panic(fmt.Errorf("unknown scheduler %s. Options are: %v", syncSpec.Scheduler, specs.AllSchedulers.String())) - } - }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - res <- rec - } - } - - p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") return nil } diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index cdaf02e616..3c55c7d4ae 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -146,7 +146,7 @@ func (*testExecutionClient) Read(ctx context.Context, table *schema.Table, sourc return fmt.Errorf("not implemented") } -func (*testExecutionClient) Sync(ctx context.Context, metrics *Metrics, res chan<- arrow.Record) error { +func (*testExecutionClient) Sync(ctx context.Context, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } @@ -356,7 +356,7 @@ func (testRand) Read(p []byte) (n int, err error) { return len(p), nil } -func TestSync(t *testing.T) { +func TestManagedSync(t *testing.T) { uuid.SetRand(testRand{}) for _, scheduler := range pbPlugin.SyncSpec_SCHEDULER_value { for _, tc := range syncTestCases { @@ -428,20 +428,29 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SC } } -func TestIgnoredColumns(t *testing.T) { - validateResources(t, schema.Resources{{ - Item: struct{ A *string }{}, - Table: &schema.Table{ - Columns: schema.ColumnList{ - { - Name: "a", - Type: arrow.BinaryTypes.String, - IgnoreInTests: true, - }, - }, - }, - }}) -} +// func TestIgnoredColumns(t *testing.T) { +// table := &schema.Table{ +// Columns: schema.ColumnList{ +// { +// Name: "a", +// Type: arrow.BinaryTypes.String, +// IgnoreInTests: true, +// }, +// }, +// } +// validateResources(t, table, schema.Resources{{ +// Item: struct{ A *string }{}, +// Table: &schema.Table{ +// Columns: schema.ColumnList{ +// { +// Name: "a", +// Type: arrow.BinaryTypes.String, +// IgnoreInTests: true, +// }, +// }, +// }, +// }}) +// } var testTable struct { PrimaryKey string diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go new file mode 100644 index 0000000000..8fadf11f6a --- /dev/null +++ b/plugin/plugin_reader.go @@ -0,0 +1,110 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type SyncOptions struct { + Tables []string + SkipTables []string + Concurrency int64 + Scheduler Scheduler + DeterministicCQID bool +} + +// Tables returns all tables supported by this source plugin +func (p *Plugin) StaticTables() schema.Tables { + return p.staticTables +} + +func (p *Plugin) HasDynamicTables() bool { + return p.getDynamicTables != nil +} + +func (p *Plugin) DynamicTables() schema.Tables { + return p.sessionTables +} + +func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { + var readErr error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + readErr = p.Read(ctx, table, sourceName, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, readErr +} + +func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { + return p.client.Read(ctx, table, sourceName, res) +} + +func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions) ([]arrow.Record, error) { + var err error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + err = p.Sync(ctx, sourceName, syncTime, options, ch) + }() + // nolint:prealloc + var resources []arrow.Record + for resource := range ch { + resources = append(resources, resource) + } + return resources, err +} + +// Sync is syncing data from the requested tables in spec to the given channel +func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, syncOptions SyncOptions, res chan<- arrow.Record) error { + if !p.mu.TryLock() { + return fmt.Errorf("plugin already in use") + } + defer p.mu.Unlock() + p.syncTime = syncTime + + startTime := time.Now() + if p.unmanagedSync { + if err := p.client.Sync(ctx, res); err != nil { + return fmt.Errorf("failed to sync unmanaged client: %w", err) + } + } else { + if len(p.sessionTables) == 0 { + return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") + } + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch syncOptions.Scheduler { + case SchedulerDFS: + p.syncDfs(ctx, syncOptions, p.client, p.sessionTables, resources) + case SchedulerRoundRobin: + p.syncRoundRobin(ctx, syncOptions, p.client, p.sessionTables, resources) + default: + panic(fmt.Errorf("unknown scheduler %s", syncOptions.Scheduler)) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec + } + } + + p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") + return nil +} diff --git a/plugin/plugin_round_robin_test.go b/plugin/plugin_round_robin_test.go deleted file mode 100644 index e24c15d108..0000000000 --- a/plugin/plugin_round_robin_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package plugin - -import ( - "context" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -func TestPluginRoundRobin(t *testing.T) { - ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanaged()) - testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) - syncTime := time.Now().UTC() - testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ - SourceName: "test", - SyncTime: syncTime, - MaxRows: 1, - }) - spec := pbPlugin.Spec{ - Name: "test", - Path: "cloudquery/test", - Version: "v1.0.0", - Registry: pbPlugin.Spec_REGISTRY_GITHUB, - WriteSpec: &pbPlugin.WriteSpec{}, - SyncSpec: &pbPlugin.SyncSpec{}, - } - if err := p.Init(ctx, spec); err != nil { - t.Fatal(err) - } - - if err := p.Migrate(ctx, schema.Tables{testTable}); err != nil { - t.Fatal(err) - } - if err := p.writeAll(ctx, spec, syncTime, testRecords); err != nil { - t.Fatal(err) - } - gotRecords, err := p.readAll(ctx, testTable, "test") - if err != nil { - t.Fatal(err) - } - if len(gotRecords) != len(testRecords) { - t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) - } - if !array.RecordEqual(testRecords[0], gotRecords[0]) { - t.Fatal("records are not equal") - } - records, err := p.syncAll(ctx, syncTime, *spec.SyncSpec) - if err != nil { - t.Fatal(err) - } - if len(records) != 1 { - t.Fatalf("got %d resources, want 1", len(records)) - } - - if !array.RecordEqual(testRecords[0], records[0]) { - t.Fatal("records are not equal") - } - - newSyncTime := time.Now().UTC() - if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { - t.Fatal(err) - } - records, err = p.syncAll(ctx, syncTime, *spec.SyncSpec) - if err != nil { - t.Fatal(err) - } - if len(records) != 0 { - t.Fatalf("got %d resources, want 0", len(records)) - } - - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } -} diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go new file mode 100644 index 0000000000..04993d5b97 --- /dev/null +++ b/plugin/plugin_test.go @@ -0,0 +1,97 @@ +package plugin + +import ( + "context" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func TestPluginUnmanagedSync(t *testing.T) { + ctx := context.Background() + p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanagedSync()) + testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) + syncTime := time.Now().UTC() + sourceName := "test" + testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ + SourceName: sourceName, + SyncTime: syncTime, + MaxRows: 1, + }) + if err := p.Init(ctx, nil); err != nil { + t.Fatal(err) + } + + if err := p.Migrate(ctx, schema.Tables{testTable}, MigrateModeSafe); err != nil { + t.Fatal(err) + } + if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, testRecords); err != nil { + t.Fatal(err) + } + gotRecords, err := p.readAll(ctx, testTable, "test") + if err != nil { + t.Fatal(err) + } + if len(gotRecords) != len(testRecords) { + t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) + } + if !array.RecordEqual(testRecords[0], gotRecords[0]) { + t.Fatal("records are not equal") + } + records, err := p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + if err != nil { + t.Fatal(err) + } + if len(records) != 1 { + t.Fatalf("got %d resources, want 1", len(records)) + } + + if !array.RecordEqual(testRecords[0], records[0]) { + t.Fatal("records are not equal") + } + + newSyncTime := time.Now().UTC() + if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { + t.Fatal(err) + } + records, err = p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + if err != nil { + t.Fatal(err) + } + if len(records) != 0 { + t.Fatalf("got %d resources, want 0", len(records)) + } + + if err := p.Close(ctx); err != nil { + t.Fatal(err) + } +} + +// func TestPluginInit(t *testing.T) { +// const ( +// batchSize = uint64(100) +// batchSizeBytes = uint64(1000) +// ) + +// var ( +// batchSizeObserved uint64 +// batchSizeBytesObserved uint64 +// ) +// p := NewPlugin( +// "test", +// "development", +// func(ctx context.Context, logger zerolog.Logger, s any) (Client, error) { +// batchSizeObserved = s.WriteSpec.BatchSize +// batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes +// return NewMemDBClient(ctx, logger, s) +// }, +// WithDefaultBatchSize(int(batchSize)), +// WithDefaultBatchSizeBytes(int(batchSizeBytes)), +// ) +// require.NoError(t, p.Init(context.TODO(), nil)) + +// require.Equal(t, batchSize, batchSizeObserved) +// require.Equal(t, batchSizeBytes, batchSizeBytesObserved) +// } diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go new file mode 100644 index 0000000000..4f13e1bdab --- /dev/null +++ b/plugin/plugin_writer.go @@ -0,0 +1,69 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { + return p.client.Migrate(ctx, tables, migrateMode) +} + +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeOne(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resource arrow.Record) error { + resources := []arrow.Record{resource} + return p.writeAll(ctx, sourceName, syncTime, writeMode, resources) +} + +// this function is currently used mostly for testing so it's not a public api +func (p *Plugin) writeAll(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resources []arrow.Record) error { + ch := make(chan arrow.Record, len(resources)) + for _, resource := range resources { + ch <- resource + } + close(ch) + tables := make(schema.Tables, 0) + tableNames := make(map[string]struct{}) + for _, resource := range resources { + sc := resource.Schema() + tableMD := sc.Metadata() + name, found := tableMD.GetValue(schema.MetadataTableName) + if !found { + return fmt.Errorf("missing table name") + } + if _, ok := tableNames[name]; ok { + continue + } + table, err := schema.NewTableFromArrowSchema(resource.Schema()) + if err != nil { + return err + } + tables = append(tables, table) + tableNames[table.Name] = struct{}{} + } + return p.Write(ctx, sourceName, tables, syncTime, writeMode, ch) +} + +func (p *Plugin) Write(ctx context.Context, sourceName string, tables schema.Tables, syncTime time.Time, writeMode WriteMode, res <-chan arrow.Record) error { + syncTime = syncTime.UTC() + if p.managedWriter { + if err := p.writeManagedTableBatch(ctx, tables, writeMode, res); err != nil { + return err + } + } else { + if err := p.client.Write(ctx, tables, writeMode, res); err != nil { + return err + } + } + + return nil +} + +func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { + syncTime = syncTime.UTC() + return p.client.DeleteStale(ctx, tables, sourceName, syncTime) +} diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go index 9b592be865..ae074503ab 100644 --- a/plugin/scheduler_dfs.go +++ b/plugin/scheduler_dfs.go @@ -8,17 +8,16 @@ import ( "sync" "sync/atomic" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/helpers" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" "golang.org/x/sync/semaphore" ) -func (p *Plugin) syncDfs(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) + tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency p.tableSems = make([]*semaphore.Weighted, p.maxDepth) @@ -173,7 +172,7 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c return } - if err := resolvedResource.CalculateCQID(p.spec.SyncSpec.DetrministicCqId); err != nil { + if err := resolvedResource.CalculateCQID(p.deterministicCQId); err != nil { tableMetrics := p.metrics.TableClient[table.Name][client.ID()] p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go index 5c6e90b391..b4c7592fcf 100644 --- a/plugin/scheduler_round_robin.go +++ b/plugin/scheduler_round_robin.go @@ -4,7 +4,6 @@ import ( "context" "sync" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "golang.org/x/sync/semaphore" ) @@ -14,8 +13,8 @@ type tableClient struct { client schema.ClientMeta } -func (p *Plugin) syncRoundRobin(ctx context.Context, spec pbPlugin.SyncSpec, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(spec.Concurrency/minResourceConcurrency, minTableConcurrency) +func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { + tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency p.tableSems = make([]*semaphore.Weighted, p.maxDepth) diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go index 3b2266d080..300900e287 100644 --- a/plugin/testing_overwrite_deletestale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -6,19 +6,18 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE +func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { + writeMode := WriteModeOverwriteDeleteStale if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) + tableName := fmt.Sprintf("cq_overwrite_delete_stale_%d", time.Now().Unix()) table := schema.TestTable(tableName, testOpts.TestSourceOptions) incTable := schema.TestTable(tableName+"_incremental", testOpts.TestSourceOptions) incTable.IsIncremental = true @@ -27,18 +26,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte table, incTable, } - if err := p.Migrate(ctx, tables); err != nil { + if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - BackendSpec: &pbPlugin.Spec{ - Name: "local", - Path: "cloudquery/local", - }, - } opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -50,7 +42,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte incResources := schema.GenTestData(incTable, opts) allResources := resources allResources = append(allResources, incResources...) - if err := p.writeAll(ctx, sourceSpec, syncTime, allResources); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, writeMode, allResources); err != nil { return fmt.Errorf("failed to write all: %w", err) } sortRecordsBySyncTime(table, resources) @@ -103,7 +95,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte allUpdatedResources := updatedResources allUpdatedResources = append(allUpdatedResources, updatedIncResources...) - if err := p.writeAll(ctx, sourceSpec, secondSyncTime, allUpdatedResources); err != nil { + if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, allUpdatedResources); err != nil { return fmt.Errorf("failed to write all second time: %w", err) } diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 0c0d6f939e..59b1cd5a9e 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -9,13 +9,12 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" ) -type Validator func(t *testing.T, plugin *Plugin, resources []*schema.Resource) +type Validator func(t *testing.T, plugin *Plugin, resources []arrow.Record) -func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...TestPluginOption) { +func TestPluginSync(t *testing.T, plugin *Plugin, sourceName string, spec any, options SyncOptions, opts ...TestPluginOption) { t.Helper() o := &testPluginOptions{ @@ -38,7 +37,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te go func() { defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), time.Now(), *spec.SyncSpec, resourcesChannel) + syncErr = plugin.Sync(context.Background(), sourceName, time.Now(), options, resourcesChannel) }() syncedResources := make([]arrow.Record, 0) @@ -48,9 +47,9 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec pbPlugin.Spec, opts ...Te if syncErr != nil { t.Fatal(syncErr) } - // for _, validator := range o.validators { - // validator(t, plugin, syncedResources) - // } + for _, validator := range o.validators { + validator(t, plugin, syncedResources) + } } type TestPluginOption func(*testPluginOptions) @@ -72,13 +71,18 @@ type testPluginOptions struct { validators []Validator } -func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Resource) []*schema.Resource { +func getTableResources(t *testing.T, table *schema.Table, resources []arrow.Record) []arrow.Record { t.Helper() - tableResources := make([]*schema.Resource, 0) + tableResources := make([]arrow.Record, 0) for _, resource := range resources { - if resource.Table.Name == table.Name { + md := resource.Schema().Metadata() + tableName, ok := md.GetValue(schema.MetadataTableName) + if !ok { + t.Errorf("Expected table name to be set in metadata") + } + if tableName == table.Name { tableResources = append(tableResources, resource) } } @@ -86,17 +90,17 @@ func getTableResources(t *testing.T, table *schema.Table, resources []*schema.Re return tableResources } -func validateTable(t *testing.T, table *schema.Table, resources []*schema.Resource) { +func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) { t.Helper() tableResources := getTableResources(t, table, resources) if len(tableResources) == 0 { t.Errorf("Expected table %s to be synced but it was not found", table.Name) return } - validateResources(t, tableResources) + validateResources(t, table, tableResources) } -func validatePlugin(t *testing.T, plugin *Plugin, resources []*schema.Resource) { +func validatePlugin(t *testing.T, plugin *Plugin, resources []arrow.Record) { t.Helper() tables := extractTables(plugin.staticTables) for _, table := range tables { @@ -115,21 +119,18 @@ func extractTables(tables schema.Tables) []*schema.Table { // Validates that every column has at least one non-nil value. // Also does some additional validations. -func validateResources(t *testing.T, resources []*schema.Resource) { +func validateResources(t *testing.T, table *schema.Table, resources []arrow.Record) { t.Helper() - table := resources[0].Table - // A set of column-names that have values in at least one of the resources. columnsWithValues := make([]bool, len(table.Columns)) for _, resource := range resources { - for i, value := range resource.GetValues() { - if value == nil { - continue - } - if value.IsValid() { - columnsWithValues[i] = true + for _, arr := range resource.Columns() { + for i := 0; i < arr.Len(); i++ { + if arr.IsValid(i) { + columnsWithValues[i] = true + } } } } diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 8f2b3da285..501ff39273 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -10,7 +10,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/rs/zerolog" @@ -22,11 +21,11 @@ type PluginTestSuite struct { // MigrateStrategy defines which tests we should include type MigrateStrategy struct { - AddColumn pbPlugin.WriteSpec_MIGRATE_MODE - AddColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE - RemoveColumn pbPlugin.WriteSpec_MIGRATE_MODE - RemoveColumnNotNull pbPlugin.WriteSpec_MIGRATE_MODE - ChangeColumn pbPlugin.WriteSpec_MIGRATE_MODE + AddColumn MigrateMode + AddColumnNotNull MigrateMode + RemoveColumn MigrateMode + RemoveColumnNotNull MigrateMode + ChangeColumn MigrateMode } type PluginTestSuiteTests struct { @@ -167,12 +166,8 @@ func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { } } -func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlugin.Spec, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { +func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { t.Helper() - destSpec.Name = "testsuite" - if destSpec.WriteSpec == nil { - destSpec.WriteSpec = &pbPlugin.WriteSpec{} - } suite := &PluginTestSuite{ tests: tests, } @@ -194,9 +189,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipOverwrite { t.Skip("skipping " + t.Name()) } - destSpec.Name = "test_write_overwrite" p := newPlugin() - if err := suite.destinationPluginTestWriteOverwrite(ctx, p, logger, destSpec, opts); err != nil { + if err := suite.destinationPluginTestWriteOverwrite(ctx, p, logger, spec, opts); err != nil { t.Fatal(err) } if err := p.Close(ctx); err != nil { @@ -209,9 +203,8 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipOverwrite || suite.tests.SkipDeleteStale { t.Skip("skipping " + t.Name()) } - destSpec.Name = "test_write_overwrite_delete_stale" p := newPlugin() - if err := suite.destinationPluginTestWriteOverwriteDeleteStale(ctx, p, logger, destSpec, opts); err != nil { + if err := suite.destinationPluginTestWriteOverwriteDeleteStale(ctx, p, logger, spec, opts); err != nil { t.Fatal(err) } if err := p.Close(ctx); err != nil { @@ -224,10 +217,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateOverwrite { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - destSpec.Name = "test_migrate_overwrite" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) + migrateMode := MigrateModeSafe + writeMode := WriteModeOverwrite + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) t.Run("TestMigrateOverwriteForce", func(t *testing.T) { @@ -235,10 +227,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateOverwriteForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - destSpec.Name = "test_migrate_overwrite_force" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyOverwrite, opts) + migrateMode := MigrateModeForced + writeMode := WriteModeOverwrite + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) t.Run("TestWriteAppend", func(t *testing.T) { @@ -246,9 +237,10 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipAppend { t.Skip("skipping " + t.Name()) } - destSpec.Name = "test_write_append" + migrateMode := MigrateModeSafe + writeMode := WriteModeOverwrite p := newPlugin() - if err := suite.destinationPluginTestWriteAppend(ctx, p, logger, destSpec, opts); err != nil { + if err := suite.destinationPluginTestWriteAppend(ctx, p, logger, migrateMode, writeMode, opts); err != nil { t.Fatal(err) } if err := p.Close(ctx); err != nil { @@ -261,10 +253,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateAppend { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - destSpec.Name = "test_migrate_append" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) + migrateMode := MigrateModeSafe + writeMode := WriteModeAppend + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) }) t.Run("TestMigrateAppendForce", func(t *testing.T) { @@ -272,10 +263,9 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec pbPlu if suite.tests.SkipMigrateAppendForce { t.Skip("skipping " + t.Name()) } - destSpec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - destSpec.WriteSpec.MigrateMode = pbPlugin.WriteSpec_FORCE - destSpec.Name = "test_migrate_append_force" - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, destSpec, tests.MigrateStrategyAppend, opts) + migrateMode := MigrateModeForced + writeMode := WriteModeAppend + suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) }) } diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go index 4720431062..68efc4ca8b 100644 --- a/plugin/testing_write_append.go +++ b/plugin/testing_write_append.go @@ -6,31 +6,26 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_APPEND - if err := p.Init(ctx, spec); err != nil { +func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, migrateMode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { + if err := p.Init(ctx, nil); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) + tableName := fmt.Sprintf("cq_write_append_%d", time.Now().Unix()) table := schema.TestTable(tableName, testOpts.TestSourceOptions) syncTime := time.Now().UTC().Round(1 * time.Second) tables := schema.Tables{ table, } - if err := p.Migrate(ctx, tables); err != nil { + if err := p.Migrate(ctx, tables, migrateMode); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := "testAppendSource" + uuid.NewString() - specSource := pbPlugin.Spec{ - Name: sourceName, - } opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -39,7 +34,7 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, TimePrecision: testOpts.TimePrecision, } record1 := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, specSource, syncTime, record1); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, writeMode, record1); err != nil { return fmt.Errorf("failed to write record first time: %w", err) } @@ -50,7 +45,7 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, if !s.tests.SkipSecondAppend { // write second time - if err := p.writeAll(ctx, specSource, secondSyncTime, record2); err != nil { + if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, record2); err != nil { return fmt.Errorf("failed to write one second time: %w", err) } } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 9ac2021866..d59da7fc8b 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -9,7 +9,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" @@ -21,19 +20,16 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, target *schema.Table, source *schema.Table, mode pbPlugin.WriteSpec_MIGRATE_MODE, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, spec); err != nil { +func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, target *schema.Table, source *schema.Table, mode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { + if err := p.Init(ctx, nil); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - if err := p.Migrate(ctx, schema.Tables{source}); err != nil { + if err := p.Migrate(ctx, schema.Tables{source}, mode); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := target.Name - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } syncTime := time.Now().UTC().Round(1 * time.Second) opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -42,16 +38,16 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. TimePrecision: testOpts.TimePrecision, } resource1 := schema.GenTestData(source, opts)[0] - if err := p.writeOne(ctx, sourceSpec, syncTime, resource1); err != nil { + if err := p.writeOne(ctx, sourceName, syncTime, writeMode, resource1); err != nil { return fmt.Errorf("failed to write one: %w", err) } - if err := p.Migrate(ctx, schema.Tables{target}); err != nil { + if err := p.Migrate(ctx, schema.Tables{target}, mode); err != nil { return fmt.Errorf("failed to migrate existing table: %w", err) } opts.SyncTime = syncTime.Add(time.Second).UTC() resource2 := schema.GenTestData(target, opts) - if err := p.writeAll(ctx, sourceSpec, syncTime, resource2); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, writeMode, resource2); err != nil { return fmt.Errorf("failed to write one after migration: %w", err) } @@ -65,7 +61,7 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. return fmt.Errorf("failed to read all: %w", err) } sortRecordsBySyncTime(target, resourcesRead) - if mode == pbPlugin.WriteSpec_SAFE { + if mode == MigrateModeSafe { if len(resourcesRead) != 2 { return fmt.Errorf("expected 2 resources after write, got %d", len(resourcesRead)) } @@ -91,14 +87,13 @@ func (*PluginTestSuite) destinationPluginTestMigrate( t *testing.T, newPlugin NewPluginFunc, logger zerolog.Logger, - spec pbPlugin.Spec, + migrateMode MigrateMode, + writeMode WriteMode, strategy MigrateStrategy, testOpts PluginTestSuiteRunnerOptions, ) { - spec.WriteSpec.BatchSize = 1 - t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.AddColumn == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -125,7 +120,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( } p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.AddColumn, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumn, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } if err := p.Close(ctx); err != nil { @@ -134,7 +129,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.AddColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -159,7 +154,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.AddColumnNotNull, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumnNotNull, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } if err := p.Close(ctx); err != nil { @@ -168,7 +163,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.RemoveColumn == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -192,7 +187,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.RemoveColumn, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumn, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } if err := p.Close(ctx); err != nil { @@ -201,7 +196,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.RemoveColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -226,7 +221,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.RemoveColumnNotNull, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumnNotNull, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } if err := p.Close(ctx); err != nil { @@ -235,7 +230,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == pbPlugin.WriteSpec_FORCE && spec.WriteSpec.MigrateMode == pbPlugin.WriteSpec_SAFE { + if strategy.ChangeColumn == MigrateModeForced && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -260,7 +255,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }} p := newPlugin() - if err := testMigration(ctx, t, p, logger, spec, target, source, strategy.ChangeColumn, testOpts); err != nil { + if err := testMigration(ctx, t, p, logger, target, source, strategy.ChangeColumn, writeMode, testOpts); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } if err := p.Close(ctx); err != nil { @@ -273,12 +268,10 @@ func (*PluginTestSuite) destinationPluginTestMigrate( table := schema.TestTable(tableName, testOpts.TestSourceOptions) p := newPlugin() - require.NoError(t, p.Init(ctx, spec)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table})) + require.NoError(t, p.Init(ctx, nil)) + require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) - nonForced := spec - nonForced.WriteSpec.MigrateMode = pbPlugin.WriteSpec_SAFE - require.NoError(t, p.Init(ctx, nonForced)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table})) + require.NoError(t, p.Init(ctx, MigrateModeSafe)) + require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) }) } diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go index 12c8400053..34e89e8b2d 100644 --- a/plugin/testing_write_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -6,32 +6,27 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" "github.com/rs/zerolog" ) -func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec pbPlugin.Spec, testOpts PluginTestSuiteRunnerOptions) error { - spec.WriteSpec.WriteMode = pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE +func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { if err := p.Init(ctx, spec); err != nil { return fmt.Errorf("failed to init plugin: %w", err) } - tableName := fmt.Sprintf("cq_%s_%d", spec.Name, time.Now().Unix()) + tableName := fmt.Sprintf("cq_test_write_overwrite_%d", time.Now().Unix()) table := schema.TestTable(tableName, testOpts.TestSourceOptions) syncTime := time.Now().UTC().Round(1 * time.Second) tables := schema.Tables{ table, } - if err := p.Migrate(ctx, tables); err != nil { + if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { return fmt.Errorf("failed to migrate tables: %w", err) } sourceName := "testOverwriteSource" + uuid.NewString() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } opts := schema.GenTestDataOptions{ SourceName: sourceName, @@ -40,7 +35,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, TimePrecision: testOpts.TimePrecision, } resources := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, sourceSpec, syncTime, resources); err != nil { + if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, resources); err != nil { return fmt.Errorf("failed to write all: %w", err) } sortRecordsBySyncTime(table, resources) @@ -82,7 +77,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, } updatedResource := schema.GenTestData(table, opts) // write second time - if err := p.writeAll(ctx, sourceSpec, secondSyncTime, updatedResource); err != nil { + if err := p.writeAll(ctx, sourceName, secondSyncTime, WriteModeOverwrite, updatedResource); err != nil { return fmt.Errorf("failed to write one second time: %w", err) } diff --git a/serve/plugin.go b/serve/plugin.go index 0adddb4b9c..f4dbfe9e5b 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -33,19 +33,19 @@ import ( "google.golang.org/grpc/test/bufconn" ) -type pluginServe struct { +type PluginServe struct { plugin *plugin.Plugin - args []string + args []string destinationV0V1Server bool sentryDSN string - testListener bool - testListenerConn *bufconn.Listener + testListener bool + testListenerConn *bufconn.Listener } -type PluginOption func(*pluginServe) +type PluginOption func(*PluginServe) func WithPluginSentryDSN(dsn string) PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.sentryDSN = dsn } } @@ -53,14 +53,14 @@ func WithPluginSentryDSN(dsn string) PluginOption { // WithDestinationV0V1Server is used to include destination v0 and v1 server to work // with older sources func WithDestinationV0V1Server() PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.destinationV0V1Server = true } } // WithArgs used to serve the plugin with predefined args instead of os.Args func WithArgs(args ...string) PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.args = args } } @@ -68,7 +68,7 @@ func WithArgs(args ...string) PluginOption { // WithTestListener means that the plugin will be served with an in-memory listener // available via testListener() method instead of a network listener. func WithTestListener() PluginOption { - return func(s *pluginServe) { + return func(s *PluginServe) { s.testListener = true s.testListenerConn = bufconn.Listen(testBufSize) } @@ -76,9 +76,9 @@ func WithTestListener() PluginOption { const servePluginShort = `Start plugin server` -func Plugin(plugin *plugin.Plugin, opts ...PluginOption) *pluginServe{ - s := &pluginServe{ - plugin: plugin, +func Plugin(p *plugin.Plugin, opts ...PluginOption) *PluginServe { + s := &PluginServe{ + plugin: p, } for _, opt := range opts { opt(s) @@ -86,12 +86,14 @@ func Plugin(plugin *plugin.Plugin, opts ...PluginOption) *pluginServe{ return s } -func (s *pluginServe) bufPluginDialer(context.Context, string) (net.Conn, error) { +func (s *PluginServe) bufPluginDialer(context.Context, string) (net.Conn, error) { return s.testListenerConn.Dial() } -func (s *pluginServe) Serve(ctx context.Context) error { - types.RegisterAllExtensions() +func (s *PluginServe) Serve(ctx context.Context) error { + if err := types.RegisterAllExtensions(); err != nil { + return err + } cmd := s.newCmdPluginRoot() if s.args != nil { cmd.SetArgs(s.args) @@ -99,7 +101,7 @@ func (s *pluginServe) Serve(ctx context.Context) error { return cmd.ExecuteContext(ctx) } -func (serve *pluginServe) newCmdPluginServe() *cobra.Command { +func (s *PluginServe) newCmdPluginServe() *cobra.Command { var address string var network string var noSentry bool @@ -131,8 +133,8 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { // opts.Plugin.Logger = logger var listener net.Listener - if serve.testListener { - listener = serve.testListenerConn + if s.testListener { + listener = s.testListenerConn } else { listener, err = net.Listen(network, address) if err != nil { @@ -143,7 +145,7 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { // unlike destination plugins that can accept multiple connections limitListener := netutil.LimitListener(listener, 1) // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go - s := grpc.NewServer( + grpcServer := grpc.NewServer( grpc.ChainUnaryInterceptor( logging.UnaryServerInterceptor(grpczerolog.InterceptorLogger(logger)), ), @@ -153,30 +155,30 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { grpc.MaxRecvMsgSize(MaxMsgSize), grpc.MaxSendMsgSize(MaxMsgSize), ) - serve.plugin.SetLogger(logger) - pbv3.RegisterPluginServer(s, &serversv3.Server{ - Plugin: serve.plugin, + s.plugin.SetLogger(logger) + pbv3.RegisterPluginServer(grpcServer, &serversv3.Server{ + Plugin: s.plugin, Logger: logger, }) - if serve.destinationV0V1Server { - pbDestinationV1.RegisterDestinationServer(s, &serverDestinationV1.Server{ - Plugin: serve.plugin, + if s.destinationV0V1Server { + pbDestinationV1.RegisterDestinationServer(grpcServer, &serverDestinationV1.Server{ + Plugin: s.plugin, Logger: logger, }) - pbDestinationV0.RegisterDestinationServer(s, &serverDestinationV0.Server{ - Plugin: serve.plugin, + pbDestinationV0.RegisterDestinationServer(grpcServer, &serverDestinationV0.Server{ + Plugin: s.plugin, Logger: logger, }) } - pbdiscoveryv0.RegisterDiscoveryServer(s, &discoveryServerV0.Server{ + pbdiscoveryv0.RegisterDiscoveryServer(grpcServer, &discoveryServerV0.Server{ Versions: []string{"v0", "v1", "v2", "v3"}, }) - version := serve.plugin.Version() + version := s.plugin.Version() - if serve.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { + if s.sentryDSN != "" && !strings.EqualFold(version, "development") && !noSentry { err = sentry.Init(sentry.ClientOptions{ - Dsn: serve.sentryDSN, + Dsn: s.sentryDSN, Debug: false, AttachStacktrace: false, Release: version, @@ -210,15 +212,15 @@ func (serve *pluginServe) newCmdPluginServe() *cobra.Command { select { case sig := <-c: logger.Info().Str("address", listener.Addr().String()).Str("signal", sig.String()).Msg("Got stop signal. Source plugin server shutting down") - s.Stop() + grpcServer.Stop() case <-ctx.Done(): logger.Info().Str("address", listener.Addr().String()).Msg("Context cancelled. Source plugin server shutting down") - s.Stop() + grpcServer.Stop() } }() logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") - if err := s.Serve(limitListener); err != nil { + if err := grpcServer.Serve(limitListener); err != nil { return fmt.Errorf("failed to serve: %w", err) } return nil @@ -251,7 +253,7 @@ doc --format json . ` ) -func (serve *pluginServe) newCmdPluginDoc() *cobra.Command { +func (s *PluginServe) newCmdPluginDoc() *cobra.Command { format := newEnum([]string{"json", "markdown"}, "markdown") cmd := &cobra.Command{ Use: "doc ", @@ -260,20 +262,20 @@ func (serve *pluginServe) newCmdPluginDoc() *cobra.Command { Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) - return serve.plugin.GeneratePluginDocs(args[0], pbFormat) + return s.plugin.GeneratePluginDocs(args[0], pbFormat) }, } cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) return cmd } -func (serve *pluginServe) newCmdPluginRoot() *cobra.Command { +func (s *PluginServe) newCmdPluginRoot() *cobra.Command { cmd := &cobra.Command{ - Use: fmt.Sprintf("%s ", serve.plugin.Name()), + Use: fmt.Sprintf("%s ", s.plugin.Name()), } - cmd.AddCommand(serve.newCmdPluginServe()) - cmd.AddCommand(serve.newCmdPluginDoc()) + cmd.AddCommand(s.newCmdPluginServe()) + cmd.AddCommand(s.newCmdPluginDoc()) cmd.CompletionOptions.DisableDefaultCmd = true - cmd.Version = serve.plugin.Version() + cmd.Version = s.plugin.Version() return cmd } diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 16f719abc8..9135012f0c 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -58,11 +58,11 @@ func (*testExecutionClient) Close(ctx context.Context) error { return nil } -func newTestExecutionClient(context.Context, zerolog.Logger, pb.Spec) (plugin.Client, error) { +func newTestExecutionClient(context.Context, zerolog.Logger, any) (plugin.Client, error) { return &testExecutionClient{}, nil } -func TestSourceSuccess(t *testing.T) { +func TestPlugin(t *testing.T) { p := plugin.NewPlugin( "testPlugin", "v1.0.0", @@ -106,16 +106,6 @@ func TestSourceSuccess(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } - spec := pb.Spec{ - Name: "testSourcePlugin", - Version: "v1.0.0", - Path: "cloudquery/testSourcePlugin", - SyncSpec: &pb.SyncSpec{ - Tables: []string{"test_table"}, - Destinations: []string{"test"}, - }, - } - getTablesRes, err := c.GetStaticTables(ctx, &pb.GetStaticTables_Request{}) if err != nil { t.Fatal(err) @@ -129,7 +119,7 @@ func TestSourceSuccess(t *testing.T) { if len(tables) != 2 { t.Fatalf("Expected 2 tables but got %d", len(tables)) } - if _, err := c.Init(ctx, &pb.Init_Request{Spec: &spec}); err != nil { + if _, err := c.Init(ctx, &pb.Init_Request{}); err != nil { t.Fatal(err) } From 3d2b0f41788293f1291a71eebea2c27e7716b7a9 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 2 Jun 2023 18:08:33 +0300 Subject: [PATCH 057/125] wip --- go.mod | 14 +- go.sum | 38 ++++- internal/plugins/local/local.go | 154 ++++++++++++++++++ .../{backends => plugins}/local/local_test.go | 2 +- internal/state/state.go | 27 +++ plugin/plugin_managed_source_test.go | 32 ++-- serve/destination_v0_test.go | 2 +- serve/destination_v1_test.go | 2 +- 8 files changed, 241 insertions(+), 30 deletions(-) create mode 100644 internal/plugins/local/local.go rename internal/{backends => plugins}/local/local_test.go (98%) create mode 100644 internal/state/state.go diff --git a/go.mod b/go.mod index 5adf4d1599..6210df53c0 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/cloudquery/plugin-sdk/v4 go 1.19 require ( - github.com/apache/arrow/go/v13 v13.0.0-20230525142029-2d32efeedad8 + github.com/apache/arrow/go/v13 v13.0.0-20230601214540-018e7d3f9c4b github.com/bradleyjkemp/cupaloy/v2 v2.8.0 github.com/cloudquery/plugin-pb-go v1.2.0 github.com/cloudquery/plugin-sdk/v2 v2.7.0 @@ -14,7 +14,6 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2 v2.0.0-rc.3 github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.3 github.com/rs/zerolog v1.29.1 - github.com/spf13/cast v1.5.0 github.com/spf13/cobra v1.6.1 github.com/stretchr/testify v1.8.4 github.com/thoas/go-funk v0.9.3 @@ -33,7 +32,7 @@ replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go require ( github.com/andybalholm/brotli v1.0.5 // indirect github.com/apache/thrift v0.16.0 // indirect - github.com/cloudquery/plugin-sdk/v3 v3.7.0 // indirect + github.com/avast/retry-go/v4 v4.3.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -43,12 +42,19 @@ require ( github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.18 // indirect - github.com/pierrec/lz4/v4 v4.1.17 // indirect + github.com/mattn/go-runewidth v0.0.14 // indirect + github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect + github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect + github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect + github.com/pierrec/lz4/v4 v4.1.15 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/schollz/progressbar/v3 v3.13.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/mod v0.8.0 // indirect golang.org/x/sys v0.7.0 // indirect + golang.org/x/term v0.7.0 // indirect golang.org/x/tools v0.6.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230530153820-e85fd2cbaebc // indirect diff --git a/go.sum b/go.sum index 17a7a98de3..0ebdc98a2f 100644 --- a/go.sum +++ b/go.sum @@ -33,6 +33,13 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= +github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= +github.com/avast/retry-go/v4 v4.3.4 h1:pHLkL7jvCvP317I8Ge+Km2Yhntv3SdkJm7uekkqbKhM= +github.com/avast/retry-go/v4 v4.3.4/go.mod h1:rv+Nla6Vk3/ilU0H51VHddWHiwimzX66yZ0JT6T+UvE= github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M= github.com/bradleyjkemp/cupaloy/v2 v2.8.0/go.mod h1:bm7JXdkRd4BHJk9HpwqAI8BoAY1lps46Enkdqw6aRX0= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -44,8 +51,6 @@ github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSE github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= -github.com/cloudquery/plugin-sdk/v3 v3.7.0 h1:aRazh17V+6AA00vmxPZRv2rudNEerSd3kqbyffRl6SA= -github.com/cloudquery/plugin-sdk/v3 v3.7.0/go.mod h1:z9Fny7SO8fNyVx6bOTM037lo7h3vJI+ZHUc/RMj20VU= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= @@ -59,7 +64,6 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/getsentry/sentry-go v0.20.0 h1:bwXW98iMRIWxn+4FgPW7vMrjmbym6HblXALmhjHmQaQ= github.com/getsentry/sentry-go v0.20.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= @@ -142,6 +146,7 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= @@ -158,8 +163,20 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +<<<<<<< HEAD +======= +github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= +github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +>>>>>>> 446b805 (wip) github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= @@ -170,6 +187,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= @@ -178,14 +197,20 @@ github.com/rs/zerolog v1.19.0/go.mod h1:IzD0RJ65iWH0w97OQQebJEvTZYvsCUm9WVLWBQrJ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc= github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= -github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= +github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= +github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +<<<<<<< HEAD +======= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +>>>>>>> 446b805 (wip) github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -324,6 +349,9 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= +golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= +golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/internal/plugins/local/local.go b/internal/plugins/local/local.go new file mode 100644 index 0000000000..1780f67162 --- /dev/null +++ b/internal/plugins/local/local.go @@ -0,0 +1,154 @@ +package local + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "path" + "strings" + "sync" + + "github.com/rs/zerolog" +) + +type LocalSpec struct { + Path string `json:"path"` +} + +type Local struct { + sourceName string + spec *LocalSpec + tables map[string]entries // table -> key -> value + tablesLock sync.RWMutex +} + +type entries map[string]string + + +func New(_ context.Context, _ zerolog.Logger, spec any) (*Local, error) { + l := &Local{ + spec: spec.(*LocalSpec), + } + tables, err := l.loadPreviousState() + if err != nil { + return nil, err + } + if tables == nil { + tables = map[string]entries{} + } + l.tables = tables + return l, nil +} + +func (l *Local) loadPreviousState() (map[string]entries, error) { + files, err := os.ReadDir(l.spec.Path) + if os.IsNotExist(err) { + return nil, nil + } + var tables = map[string]entries{} + for _, f := range files { + if f.IsDir() || !f.Type().IsRegular() { + continue + } + name := f.Name() + if !strings.HasSuffix(name, ".json") || !strings.HasPrefix(name, l.sourceName+"-") { + continue + } + table, kv, err := l.readFile(name) + if err != nil { + return nil, err + } + tables[table] = kv + } + return tables, nil +} + +func (l *Local) readFile(name string) (table string, kv entries, err error) { + p := path.Join(l.spec.Path, name) + f, err := os.Open(p) + if err != nil { + return "", nil, fmt.Errorf("failed to open state file: %w", err) + } + b, err := io.ReadAll(f) + if err != nil { + return "", nil, fmt.Errorf("failed to read state file: %w", err) + } + err = f.Close() + if err != nil { + return "", nil, fmt.Errorf("failed to close state file: %w", err) + } + err = json.Unmarshal(b, &kv) + if err != nil { + return "", nil, fmt.Errorf("failed to unmarshal state file: %w", err) + } + table = strings.TrimPrefix(strings.TrimSuffix(name, ".json"), l.sourceName+"-") + return table, kv, nil +} + +func (l *Local) Get(_ context.Context, table, clientID string) (string, error) { + l.tablesLock.RLock() + defer l.tablesLock.RUnlock() + + if _, ok := l.tables[table]; !ok { + return "", nil + } + return l.tables[table][clientID], nil +} + +func (l *Local) Set(_ context.Context, table, clientID, value string) error { + l.tablesLock.Lock() + defer l.tablesLock.Unlock() + + if _, ok := l.tables[table]; !ok { + l.tables[table] = map[string]string{} + } + prev := l.tables[table][clientID] + l.tables[table][clientID] = value + if prev != value { + // only flush if the value changed + return l.flushTable(table, l.tables[table]) + } + return nil +} + +func (l *Local) Close(_ context.Context) error { + l.tablesLock.RLock() + defer l.tablesLock.RUnlock() + + return l.flush() +} + +func (l *Local) flush() error { + for table, kv := range l.tables { + err := l.flushTable(table, kv) + if err != nil { + return err + } + } + return nil +} + +func (l *Local) flushTable(table string, entries entries) error { + if len(entries) == 0 { + return nil + } + + err := os.MkdirAll(l.spec.Path, 0755) + if err != nil { + return fmt.Errorf("failed to create state directory %v: %w", l.spec.Path, err) + } + + b, err := json.MarshalIndent(entries, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal state for table %v: %w", table, err) + } + f := path.Join(l.spec.Path, l.sourceName+"-"+table+".json") + err = os.WriteFile(f, b, 0644) + if err != nil { + return fmt.Errorf("failed to write state for table %v: %w", table, err) + } + + return nil +} diff --git a/internal/backends/local/local_test.go b/internal/plugins/local/local_test.go similarity index 98% rename from internal/backends/local/local_test.go rename to internal/plugins/local/local_test.go index 4e3423f9d8..58fb073ed3 100644 --- a/internal/backends/local/local_test.go +++ b/internal/plugins/local/local_test.go @@ -4,7 +4,7 @@ import ( "context" "testing" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" ) func TestLocal(t *testing.T) { diff --git a/internal/state/state.go b/internal/state/state.go new file mode 100644 index 0000000000..1dc00b79f9 --- /dev/null +++ b/internal/state/state.go @@ -0,0 +1,27 @@ +package state + +import "context" + +type Client struct { + // managedPlugin managedplugin.Client +} + +// func NewState(ctx context.Context, managedPlugin managedplugin.Client) *Client { +// return &Client{ +// managedPlugin: managedPlugin, +// } +// c := pbPlugin.NewPluginClient(managedPlugin.Conn) +// c.Write(ctx, ) +// } + +func NewState(spec any) *Client { + return &Client{} +} + +func (* Client) SetKey(ctx context.Context, key string, value string) error { + return nil +} + +func (* Client) GetKey(ctx context.Context, key string) (string, error) { + return "", nil +} \ No newline at end of file diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index 3c55c7d4ae..8520c3a7be 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" @@ -358,18 +359,18 @@ func (testRand) Read(p []byte) (n int, err error) { func TestManagedSync(t *testing.T) { uuid.SetRand(testRand{}) - for _, scheduler := range pbPlugin.SyncSpec_SCHEDULER_value { + for _, scheduler := range plugin.AllSchedulers { for _, tc := range syncTestCases { tc := tc tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+pbPlugin.SyncSpec_SCHEDULER(scheduler).String(), func(t *testing.T) { - testSyncTable(t, tc, pbPlugin.SyncSpec_SCHEDULER(scheduler), tc.deterministicCQID) + t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { + testSyncTable(t, tc, scheduler, tc.deterministicCQID) }) } } } -func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SCHEDULER, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, scheduler plugin.Scheduler, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, @@ -382,23 +383,18 @@ func testSyncTable(t *testing.T, tc syncTestCase, scheduler pbPlugin.SyncSpec_SC WithStaticTables(tables), ) plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - spec := pbPlugin.Spec{ - Name: "testSource", - Path: "cloudquery/testSource", - Version: "v1.0.0", - SyncSpec: &pbPlugin.SyncSpec{ - Tables: []string{"*"}, - Destinations: []string{"test"}, - Concurrency: 1, // choose a very low value to check that we don't run into deadlocks - Scheduler: scheduler, - DetrministicCqId: deterministicCQID, - }, - } - if err := plugin.Init(ctx, spec); err != nil { + sourceName := "testSource" + + if err := plugin.Init(ctx, nil); err != nil { t.Fatal(err) } - records, err := plugin.syncAll(ctx, testSyncTime, *spec.SyncSpec) + records, err := plugin.syncAll(ctx, sourceName, testSyncTime, SyncOptions{ + Tables: []string{"*"}, + Concurrency: 1, + Scheduler: scheduler, + DeterministicCQID: deterministicCQID, + }) if err != nil { t.Fatal(err) } diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 6a206e53ad..a1ba81f794 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -12,7 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 577fd8f9ff..c65320eea1 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -12,7 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs" + "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" From ff8d458e6a17f8f889b2d068b9542d2c843b7637 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 4 Jun 2023 11:34:00 +0300 Subject: [PATCH 058/125] more wip --- internal/plugins/local/local.go | 154 ------------------- internal/plugins/local/local_test.go | 101 ------------- internal/servers/discovery/v1/discovery.go | 16 ++ internal/servers/plugin/v3/plugin.go | 31 +++- internal/servers/plugin/v3/state.go | 165 +++++++++++++++++++++ internal/state/state.go | 27 ---- plugin/memdb.go | 6 +- plugin/options.go | 27 ++++ plugin/plugin.go | 16 +- plugin/plugin_managed_sync.go | 45 ++++++ plugin/plugin_reader.go | 41 ++--- plugin/plugin_writer.go | 3 + plugin/scheduler_dfs.go | 2 +- plugin/scheduler_round_robin.go | 2 +- plugin/state.go | 1 + serve/plugin.go | 6 + serve/plugin_test.go | 4 + serve/state_v3_test.go | 57 +++++++ state/state.go | 8 + 19 files changed, 393 insertions(+), 319 deletions(-) delete mode 100644 internal/plugins/local/local.go delete mode 100644 internal/plugins/local/local_test.go create mode 100644 internal/servers/discovery/v1/discovery.go create mode 100644 internal/servers/plugin/v3/state.go delete mode 100644 internal/state/state.go create mode 100644 plugin/plugin_managed_sync.go create mode 100644 plugin/state.go create mode 100644 serve/state_v3_test.go create mode 100644 state/state.go diff --git a/internal/plugins/local/local.go b/internal/plugins/local/local.go deleted file mode 100644 index 1780f67162..0000000000 --- a/internal/plugins/local/local.go +++ /dev/null @@ -1,154 +0,0 @@ -package local - -import ( - "context" - "encoding/json" - "fmt" - "io" - "os" - "path" - "strings" - "sync" - - "github.com/rs/zerolog" -) - -type LocalSpec struct { - Path string `json:"path"` -} - -type Local struct { - sourceName string - spec *LocalSpec - tables map[string]entries // table -> key -> value - tablesLock sync.RWMutex -} - -type entries map[string]string - - -func New(_ context.Context, _ zerolog.Logger, spec any) (*Local, error) { - l := &Local{ - spec: spec.(*LocalSpec), - } - tables, err := l.loadPreviousState() - if err != nil { - return nil, err - } - if tables == nil { - tables = map[string]entries{} - } - l.tables = tables - return l, nil -} - -func (l *Local) loadPreviousState() (map[string]entries, error) { - files, err := os.ReadDir(l.spec.Path) - if os.IsNotExist(err) { - return nil, nil - } - var tables = map[string]entries{} - for _, f := range files { - if f.IsDir() || !f.Type().IsRegular() { - continue - } - name := f.Name() - if !strings.HasSuffix(name, ".json") || !strings.HasPrefix(name, l.sourceName+"-") { - continue - } - table, kv, err := l.readFile(name) - if err != nil { - return nil, err - } - tables[table] = kv - } - return tables, nil -} - -func (l *Local) readFile(name string) (table string, kv entries, err error) { - p := path.Join(l.spec.Path, name) - f, err := os.Open(p) - if err != nil { - return "", nil, fmt.Errorf("failed to open state file: %w", err) - } - b, err := io.ReadAll(f) - if err != nil { - return "", nil, fmt.Errorf("failed to read state file: %w", err) - } - err = f.Close() - if err != nil { - return "", nil, fmt.Errorf("failed to close state file: %w", err) - } - err = json.Unmarshal(b, &kv) - if err != nil { - return "", nil, fmt.Errorf("failed to unmarshal state file: %w", err) - } - table = strings.TrimPrefix(strings.TrimSuffix(name, ".json"), l.sourceName+"-") - return table, kv, nil -} - -func (l *Local) Get(_ context.Context, table, clientID string) (string, error) { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - if _, ok := l.tables[table]; !ok { - return "", nil - } - return l.tables[table][clientID], nil -} - -func (l *Local) Set(_ context.Context, table, clientID, value string) error { - l.tablesLock.Lock() - defer l.tablesLock.Unlock() - - if _, ok := l.tables[table]; !ok { - l.tables[table] = map[string]string{} - } - prev := l.tables[table][clientID] - l.tables[table][clientID] = value - if prev != value { - // only flush if the value changed - return l.flushTable(table, l.tables[table]) - } - return nil -} - -func (l *Local) Close(_ context.Context) error { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - return l.flush() -} - -func (l *Local) flush() error { - for table, kv := range l.tables { - err := l.flushTable(table, kv) - if err != nil { - return err - } - } - return nil -} - -func (l *Local) flushTable(table string, entries entries) error { - if len(entries) == 0 { - return nil - } - - err := os.MkdirAll(l.spec.Path, 0755) - if err != nil { - return fmt.Errorf("failed to create state directory %v: %w", l.spec.Path, err) - } - - b, err := json.MarshalIndent(entries, "", " ") - if err != nil { - return fmt.Errorf("failed to marshal state for table %v: %w", table, err) - } - f := path.Join(l.spec.Path, l.sourceName+"-"+table+".json") - err = os.WriteFile(f, b, 0644) - if err != nil { - return fmt.Errorf("failed to write state for table %v: %w", table, err) - } - - return nil -} diff --git a/internal/plugins/local/local_test.go b/internal/plugins/local/local_test.go deleted file mode 100644 index 58fb073ed3..0000000000 --- a/internal/plugins/local/local_test.go +++ /dev/null @@ -1,101 +0,0 @@ -package local - -import ( - "context" - "testing" - - "github.com/cloudquery/plugin-pb-go/specs/v0" -) - -func TestLocal(t *testing.T) { - tmpDir := t.TempDir() - ctx := context.Background() - ss := specs.Source{ - Name: "test", - Version: "vtest", - Path: "test", - Backend: specs.BackendLocal, - BackendSpec: Spec{ - Path: tmpDir, - }, - } - local, err := New(ss) - if err != nil { - t.Fatalf("failed to create local backend: %v", err) - } - if local.spec.Path != tmpDir { - t.Fatalf("expected path to be %s, but got %s", tmpDir, local.spec.Path) - } - - tableName := "test_table" - clientID := "test_client" - got, err := local.Get(ctx, tableName, clientID) - if err != nil { - t.Fatalf("failed to get value: %v", err) - } - if got != "" { - t.Fatalf("expected empty value, but got %s", got) - } - - err = local.Set(ctx, tableName, clientID, "test_value") - if err != nil { - t.Fatalf("failed to set value: %v", err) - } - - got, err = local.Get(ctx, tableName, clientID) - if err != nil { - t.Fatalf("failed to get value after setting it: %v", err) - } - if got != "test_value" { - t.Fatalf("expected value to be test_value, but got %s", got) - } - - err = local.Close(ctx) - if err != nil { - t.Fatalf("failed to close local backend: %v", err) - } - - local, err = New(ss) - if err != nil { - t.Fatalf("failed to open local backend after closing it: %v", err) - } - - got, err = local.Get(ctx, tableName, clientID) - if err != nil { - t.Fatalf("failed to get value after closing and reopening local backend: %v", err) - } - if got != "test_value" { - t.Fatalf("expected value to be test_value, but got %s", got) - } - - got, err = local.Get(ctx, "some_other_table", clientID) - if err != nil { - t.Fatalf("failed to get value after closing and reopening local backend: %v", err) - } - if got != "" { - t.Fatalf("expected empty value for some_other_table -> test_key, but got %s", got) - } - err = local.Close(ctx) - if err != nil { - t.Fatalf("failed to close local backend the second time: %v", err) - } - - // check that state is namespaced by source name - ss.Name = "test2" - local2, err := New(ss) - if err != nil { - t.Fatalf("failed to create local backend for test2: %v", err) - } - - got, err = local2.Get(ctx, "test_table", clientID) - if err != nil { - t.Fatalf("failed to get value for local backend test2: %v", err) - } - if got != "" { - t.Fatalf("expected empty value for test2 -> test_table -> test_key, but got %s", got) - } - err = local2.Close(ctx) - if err != nil { - t.Fatalf("failed to close second local backend: %v", err) - } -} diff --git a/internal/servers/discovery/v1/discovery.go b/internal/servers/discovery/v1/discovery.go new file mode 100644 index 0000000000..47c0197ec7 --- /dev/null +++ b/internal/servers/discovery/v1/discovery.go @@ -0,0 +1,16 @@ +package discovery + +import ( + "context" + + pb "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" +) + +type Server struct { + pb.UnimplementedDiscoveryServer + Versions []uint64 +} + +func (s *Server) GetVersions(context.Context, *pb.GetVersions_Request) (*pb.GetVersions_Response, error) { + return &pb.GetVersions_Response{Versions: s.Versions}, nil +} diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index bbeb968859..b50de13253 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -12,6 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -29,6 +30,8 @@ type Server struct { pb.UnimplementedPluginServer Plugin *plugin.Plugin Logger zerolog.Logger + Directory string + NoSentry bool } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -92,6 +95,29 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { sourceName := req.SourceName + if req.StateBackend != nil { + opts := []managedplugin.Option{ + managedplugin.WithLogger(s.Logger), + managedplugin.WithDirectory(s.Directory), + } + if s.NoSentry { + opts = append(opts, managedplugin.WithNoSentry()) + } + statePlugin, err := managedplugin.NewClient(ctx, managedplugin.Config{ + Path: req.StateBackend.Path, + Registry: managedplugin.Registry(req.StateBackend.Registry), + Version: req.StateBackend.Version, + }, opts...) + if err != nil { + return status.Errorf(codes.Internal, "failed to create state plugin: %v", err) + } + stateClient, err := newStateClient(ctx, statePlugin.Conn, *req.StateBackend) + if err != nil { + return status.Errorf(codes.Internal, "failed to create state client: %v", err) + } + syncOptions.StateBackend = stateClient + } + go func() { defer close(records) err := s.Plugin.Sync(ctx, sourceName, req.SyncTime.AsTime(), syncOptions, records) @@ -172,7 +198,6 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr case pb.MIGRATE_MODE_FORCE: migrateMode = plugin.MigrateModeForced } - // switch req. return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } @@ -317,3 +342,7 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { setCQIDAsPrimaryKeysForTables(table.Relations) } } + +func (s *Server) Close(ctx context.Context, _ *pb.Close_Request) (*pb.Close_Response, error) { + return &pb.Close_Response{}, s.Plugin.Close(ctx) +} \ No newline at end of file diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go new file mode 100644 index 0000000000..4b7a43c297 --- /dev/null +++ b/internal/servers/plugin/v3/state.go @@ -0,0 +1,165 @@ +package plugin + +import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" + pbDiscovery "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" + pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/state" + "golang.org/x/exp/slices" + "google.golang.org/grpc" +) + +const stateTablePrefix = "cq_state_" +const keyColumn = "key" +const valueColumn = "value" + +type ClientV3 struct { + client pbPlugin.PluginClient + encodedTables [][]byte + mem map[string]string + keys []string + values []string +} + +func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.StateBackendSpec) (state.Client, error) { + discoveryClient := pbDiscovery.NewDiscoveryClient(conn) + versions, err := discoveryClient.GetVersions(ctx, &pbDiscovery.GetVersions_Request{}) + if err != nil { + return nil, err + } + if !slices.Contains(versions.Versions, 3) { + return nil, fmt.Errorf("please upgrade your state backend plugin") + } + + c := &ClientV3{ + client: pbPlugin.NewPluginClient(conn), + mem: make(map[string]string), + keys: make([]string, 0), + values: make([]string, 0), + } + name := spec.Name + table := &schema.Table{ + Name: stateTablePrefix + name, + Columns: []schema.Column{ + { + Name: keyColumn, + Type: arrow.BinaryTypes.String, + PrimaryKey: true, + }, + { + Name: valueColumn, + Type: arrow.BinaryTypes.String, + }, + }, + } + tables := schema.Tables{table} + c.encodedTables, err = tables.ToArrowSchemas().Encode() + if err != nil { + return nil, err + } + + if _, err := c.client.Init(ctx, &pbPlugin.Init_Request{ + Spec: spec.Spec, + }); err != nil { + return nil, err + } + + if _, err := c.client.Migrate(ctx, &pbPlugin.Migrate_Request{ + Tables: c.encodedTables, + MigrateMode: pbPlugin.MIGRATE_MODE_SAFE, + }); err != nil { + return nil, err + } + + syncClient, err := c.client.Sync(ctx, &pbPlugin.Sync_Request{ + Tables: []string{stateTablePrefix + name}, + }) + if err != nil { + return nil, err + } + for { + res, err := syncClient.Recv() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + rdr, err := ipc.NewReader(bytes.NewReader(res.Resource)) + if err != nil { + return nil, err + } + for { + record, err := rdr.Read() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + keys := record.Columns()[0].(*array.String) + values := record.Columns()[1].(*array.String) + for i := 0; i < keys.Len(); i++ { + c.mem[keys.Value(i)] = values.Value(i) + } + } + } + return c, nil +} + + +func (c *ClientV3) SetKey(ctx context.Context, key string, value string) error { + c.mem[key] = value + return nil +} + +func (c *ClientV3) flush(ctx context.Context) error { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, nil) + for k, v := range c.mem { + bldr.Field(0).(*array.StringBuilder).Append(k) + bldr.Field(1).(*array.StringBuilder).Append(v) + } + rec := bldr.NewRecord() + var buf bytes.Buffer + wrtr := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) + if err := wrtr.Write(rec); err != nil { + return err + } + if err := wrtr.Close(); err != nil { + return err + } + writeClient, err := c.client.Write(ctx) + if err != nil { + return err + } + if err := writeClient.Send(&pbPlugin.Write_Request{ + WriteMode: pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE, + }); err != nil { + return err + } + if err := writeClient.Send(&pbPlugin.Write_Request{ + Resource: buf.Bytes(), + }); err != nil { + return err + } + if _, err := writeClient.CloseAndRecv(); err != nil { + return err + } + return nil +} + +func (c *ClientV3) GetKey(ctx context.Context, key string) (string, error) { + if val, ok := c.mem[key]; ok { + return val, nil + } + return "", fmt.Errorf("key not found") +} \ No newline at end of file diff --git a/internal/state/state.go b/internal/state/state.go deleted file mode 100644 index 1dc00b79f9..0000000000 --- a/internal/state/state.go +++ /dev/null @@ -1,27 +0,0 @@ -package state - -import "context" - -type Client struct { - // managedPlugin managedplugin.Client -} - -// func NewState(ctx context.Context, managedPlugin managedplugin.Client) *Client { -// return &Client{ -// managedPlugin: managedPlugin, -// } -// c := pbPlugin.NewPluginClient(managedPlugin.Conn) -// c.Write(ctx, ) -// } - -func NewState(spec any) *Client { - return &Client{} -} - -func (* Client) SetKey(ctx context.Context, key string, value string) error { - return nil -} - -func (* Client) GetKey(ctx context.Context, key string) (string, error) { - return "", nil -} \ No newline at end of file diff --git a/plugin/memdb.go b/plugin/memdb.go index 4a99073935..8c23b430a3 100644 --- a/plugin/memdb.go +++ b/plugin/memdb.go @@ -84,7 +84,11 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, res chan<- arrow.Record) error { +func (c *client) NewManagedSyncClient(context.Context, SyncOptions) (ManagedSyncClient, error) { + return nil, fmt.Errorf("not supported") +} + +func (c *client) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { diff --git a/plugin/options.go b/plugin/options.go index fa2b3b53f2..6ad134b843 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -3,6 +3,7 @@ package plugin import ( "bytes" "context" + "fmt" "time" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -23,6 +24,32 @@ func (m MigrateMode) String() string { return migrateModeStrings[m] } +type Registry int + +const ( + RegistryGithub Registry = iota + RegistryLocal + RegistryGrpc +) + +func (r Registry) String() string { + return [...]string{"github", "local", "grpc"}[r] +} + +func RegistryFromString(s string) (Registry, error) { + switch s { + case "github": + return RegistryGithub, nil + case "local": + return RegistryLocal, nil + case "grpc": + return RegistryGrpc, nil + default: + return RegistryGithub, fmt.Errorf("unknown registry %s", s) + } +} + + type WriteMode int const ( diff --git a/plugin/plugin.go b/plugin/plugin.go index 754fd424d6..aa456bdaac 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -29,9 +29,13 @@ type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) -type Client interface { +type ManagedSyncClient interface { ID() string - Sync(ctx context.Context, res chan<- arrow.Record) error +} + +type Client interface { + NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) + Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error @@ -42,15 +46,15 @@ type Client interface { type UnimplementedWriter struct{} -func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables) error { +func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { return fmt.Errorf("not implemented") } -func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, res <-chan arrow.Record) error { +func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { return fmt.Errorf("not implemented") } -func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, data []arrow.Record) error { +func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error { return fmt.Errorf("not implemented") } @@ -60,7 +64,7 @@ func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, res chan<- arrow.Record) error { +func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } diff --git a/plugin/plugin_managed_sync.go b/plugin/plugin_managed_sync.go new file mode 100644 index 0000000000..2f52685513 --- /dev/null +++ b/plugin/plugin_managed_sync.go @@ -0,0 +1,45 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (p *Plugin) managedSync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { + if len(p.sessionTables) == 0 { + return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") + } + + managedClient, err := p.client.NewManagedSyncClient(ctx, options) + if err != nil { + return fmt.Errorf("failed to create managed sync client: %w", err) + } + + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch options.Scheduler { + case SchedulerDFS: + p.syncDfs(ctx, options, managedClient, p.sessionTables, resources) + case SchedulerRoundRobin: + p.syncRoundRobin(ctx, options, managedClient, p.sessionTables, resources) + default: + panic(fmt.Errorf("unknown scheduler %s", options.Scheduler)) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec + } + return nil +} \ No newline at end of file diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 8fadf11f6a..bb59759c0c 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -6,18 +6,20 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/cloudquery/plugin-sdk/v4/state" + "github.com/google/uuid" ) + + type SyncOptions struct { Tables []string SkipTables []string Concurrency int64 Scheduler Scheduler DeterministicCQID bool + StateBackend state.Client } // Tables returns all tables supported by this source plugin @@ -52,6 +54,10 @@ func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName strin return p.client.Read(ctx, table, sourceName, res) } +func (p *Plugin) Acknowledge(ctx context.Context, recordUUID uuid.UUID) error { + return nil +} + func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) @@ -68,40 +74,21 @@ func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.T } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, syncOptions SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() p.syncTime = syncTime - startTime := time.Now() + if p.unmanagedSync { - if err := p.client.Sync(ctx, res); err != nil { + if err := p.client.Sync(ctx, options, res); err != nil { return fmt.Errorf("failed to sync unmanaged client: %w", err) } } else { - if len(p.sessionTables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - resources := make(chan *schema.Resource) - go func() { - defer close(resources) - switch syncOptions.Scheduler { - case SchedulerDFS: - p.syncDfs(ctx, syncOptions, p.client, p.sessionTables, resources) - case SchedulerRoundRobin: - p.syncRoundRobin(ctx, syncOptions, p.client, p.sessionTables, resources) - default: - panic(fmt.Errorf("unknown scheduler %s", syncOptions.Scheduler)) - } - }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - res <- rec + if err := p.managedSync(ctx, sourceName, syncTime, options, res); err != nil { + return fmt.Errorf("failed to sync managed client: %w", err) } } diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 4f13e1bdab..af37b8df48 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -10,6 +10,9 @@ import ( ) func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { + if p.client == nil { + return fmt.Errorf("plugin is not initialized") + } return p.client.Migrate(ctx, tables, migrateMode) } diff --git a/plugin/scheduler_dfs.go b/plugin/scheduler_dfs.go index ae074503ab..bd87c50aeb 100644 --- a/plugin/scheduler_dfs.go +++ b/plugin/scheduler_dfs.go @@ -14,7 +14,7 @@ import ( "golang.org/x/sync/semaphore" ) -func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) diff --git a/plugin/scheduler_round_robin.go b/plugin/scheduler_round_robin.go index b4c7592fcf..a0be17938d 100644 --- a/plugin/scheduler_round_robin.go +++ b/plugin/scheduler_round_robin.go @@ -13,7 +13,7 @@ type tableClient struct { client schema.ClientMeta } -func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client Client, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency diff --git a/plugin/state.go b/plugin/state.go new file mode 100644 index 0000000000..6831e406a6 --- /dev/null +++ b/plugin/state.go @@ -0,0 +1 @@ +package plugin \ No newline at end of file diff --git a/serve/plugin.go b/serve/plugin.go index f4dbfe9e5b..483312b871 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -15,8 +15,10 @@ import ( pbDestinationV0 "github.com/cloudquery/plugin-pb-go/pb/destination/v0" pbDestinationV1 "github.com/cloudquery/plugin-pb-go/pb/destination/v1" pbdiscoveryv0 "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" + pbdiscoveryv1 "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" pbv3 "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" discoveryServerV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/discovery/v0" + discoveryServerV1 "github.com/cloudquery/plugin-sdk/v4/internal/servers/discovery/v1" serverDestinationV0 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" serverDestinationV1 "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v1" @@ -159,6 +161,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { pbv3.RegisterPluginServer(grpcServer, &serversv3.Server{ Plugin: s.plugin, Logger: logger, + NoSentry: noSentry, }) if s.destinationV0V1Server { pbDestinationV1.RegisterDestinationServer(grpcServer, &serverDestinationV1.Server{ @@ -173,6 +176,9 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { pbdiscoveryv0.RegisterDiscoveryServer(grpcServer, &discoveryServerV0.Server{ Versions: []string{"v0", "v1", "v2", "v3"}, }) + pbdiscoveryv1.RegisterDiscoveryServer(grpcServer, &discoveryServerV1.Server{ + Versions: []uint64{0,1,2,3}, + }) version := s.plugin.Version() diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 9135012f0c..e09308b704 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -58,6 +58,10 @@ func (*testExecutionClient) Close(ctx context.Context) error { return nil } +func (c *testExecutionClient) NewManagedSyncClient(ctx context.Context, options plugin.SyncOptions) (plugin.ManagedSyncClient, error) { + return c, nil +} + func newTestExecutionClient(context.Context, zerolog.Logger, any) (plugin.Client, error) { return &testExecutionClient{}, nil } diff --git a/serve/state_v3_test.go b/serve/state_v3_test.go new file mode 100644 index 0000000000..d1442c2e88 --- /dev/null +++ b/serve/state_v3_test.go @@ -0,0 +1,57 @@ +package serve + +import ( + "context" + "sync" + "testing" + + "github.com/cloudquery/plugin-sdk/v4/internal/state" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func TestStateV3(t *testing.T) { + p := plugin.NewPlugin("memdb", "v1.0.0", plugin.NewMemDBClient) + srv := Plugin(p, WithArgs("serve"), WithTestListener()) + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + var wg sync.WaitGroup + wg.Add(1) + var serverErr error + go func() { + defer wg.Done() + serverErr = srv.Serve(ctx) + }() + defer func() { + cancel() + wg.Wait() + }() + + // https://stackoverflow.com/questions/42102496/testing-a-grpc-service + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + if err != nil { + t.Fatalf("Failed to dial bufnet: %v", err) + } + + stateClient, err := state.NewClient(ctx, "test", conn) + if err != nil { + t.Fatalf("Failed to create state client: %v", err) + } + if err := stateClient.SetKey(ctx, "testKey", "testValue"); err != nil { + t.Fatalf("Failed to set key: %v", err) + } + key, err := stateClient.GetKey(ctx, "testKey") + if err != nil { + t.Fatalf("Failed to get key: %v", err) + } + if key != "testValue" { + t.Fatalf("Unexpected key value: %v", key) + } + + cancel() + wg.Wait() + if serverErr != nil { + t.Fatal(serverErr) + } +} \ No newline at end of file diff --git a/state/state.go b/state/state.go new file mode 100644 index 0000000000..e92423df3c --- /dev/null +++ b/state/state.go @@ -0,0 +1,8 @@ +package state + +import "context" + +type Client interface { + SetKey(ctx context.Context, key string, value string) error + GetKey(ctx context.Context, key string) (string, error) +} \ No newline at end of file From 6f85bf7ec372935f018497207dbbb3b99a086a98 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:25:54 +0300 Subject: [PATCH 059/125] more wip --- backend/backend.go | 12 -- internal/backends/local/local.go | 157 ------------------ internal/backends/local/spec.go | 12 -- internal/backends/nop/nop.go | 23 --- .../servers/destination/v0/destinations.go | 4 +- .../servers/destination/v1/destinations.go | 2 +- internal/servers/plugin/v3/plugin.go | 25 ++- internal/servers/plugin/v3/state.go | 23 ++- plugin/memdb_test.go | 34 ++-- plugin/options.go | 5 +- plugin/plugin.go | 19 +-- plugin/plugin_managed_source_test.go | 5 +- plugin/plugin_managed_sync.go | 5 +- plugin/plugin_reader.go | 97 +++++++---- plugin/plugin_test.go | 12 +- plugin/state.go | 1 - plugin/testing_overwrite_deletestale.go | 30 +++- plugin/testing_sync.go | 3 +- plugin/testing_write.go | 4 +- plugin/testing_write_append.go | 6 +- plugin/testing_write_migrate.go | 15 +- plugin/testing_write_overwrite.go | 12 +- serve/destination_v0_test.go | 4 +- serve/destination_v1_test.go | 4 +- serve/plugin.go | 6 +- serve/state_v3_test.go | 2 +- state/state.go | 2 +- 27 files changed, 194 insertions(+), 330 deletions(-) delete mode 100644 backend/backend.go delete mode 100644 internal/backends/local/local.go delete mode 100644 internal/backends/local/spec.go delete mode 100644 internal/backends/nop/nop.go delete mode 100644 plugin/state.go diff --git a/backend/backend.go b/backend/backend.go deleted file mode 100644 index fc4e639233..0000000000 --- a/backend/backend.go +++ /dev/null @@ -1,12 +0,0 @@ -package backend - -import "context" - -type Backend interface { - // Set sets the value for the given table and client id. - Set(ctx context.Context, table, clientID, value string) error - // Get returns the value for the given table and client id. - Get(ctx context.Context, table, clientID string) (string, error) - // Close closes the backend. - Close(ctx context.Context) error -} diff --git a/internal/backends/local/local.go b/internal/backends/local/local.go deleted file mode 100644 index f593260dde..0000000000 --- a/internal/backends/local/local.go +++ /dev/null @@ -1,157 +0,0 @@ -package local - -import ( - "context" - "encoding/json" - "fmt" - "io" - "os" - "path" - "strings" - "sync" - - "github.com/cloudquery/plugin-pb-go/specs/v0" -) - -type Local struct { - sourceName string - spec Spec - tables map[string]entries // table -> key -> value - tablesLock sync.RWMutex -} - -type entries map[string]string - -func New(sourceSpec specs.Source) (*Local, error) { - spec := Spec{} - err := sourceSpec.UnmarshalBackendSpec(&spec) - if err != nil { - return nil, err - } - spec.SetDefaults() - - l := &Local{ - sourceName: sourceSpec.Name, - spec: spec, - } - tables, err := l.loadPreviousState() - if err != nil { - return nil, err - } - if tables == nil { - tables = map[string]entries{} - } - l.tables = tables - return l, nil -} - -func (l *Local) loadPreviousState() (map[string]entries, error) { - files, err := os.ReadDir(l.spec.Path) - if os.IsNotExist(err) { - return nil, nil - } - var tables = map[string]entries{} - for _, f := range files { - if f.IsDir() || !f.Type().IsRegular() { - continue - } - name := f.Name() - if !strings.HasSuffix(name, ".json") || !strings.HasPrefix(name, l.sourceName+"-") { - continue - } - table, kv, err := l.readFile(name) - if err != nil { - return nil, err - } - tables[table] = kv - } - return tables, nil -} - -func (l *Local) readFile(name string) (table string, kv entries, err error) { - p := path.Join(l.spec.Path, name) - f, err := os.Open(p) - if err != nil { - return "", nil, fmt.Errorf("failed to open state file: %w", err) - } - b, err := io.ReadAll(f) - if err != nil { - return "", nil, fmt.Errorf("failed to read state file: %w", err) - } - err = f.Close() - if err != nil { - return "", nil, fmt.Errorf("failed to close state file: %w", err) - } - err = json.Unmarshal(b, &kv) - if err != nil { - return "", nil, fmt.Errorf("failed to unmarshal state file: %w", err) - } - table = strings.TrimPrefix(strings.TrimSuffix(name, ".json"), l.sourceName+"-") - return table, kv, nil -} - -func (l *Local) Get(_ context.Context, table, clientID string) (string, error) { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - if _, ok := l.tables[table]; !ok { - return "", nil - } - return l.tables[table][clientID], nil -} - -func (l *Local) Set(_ context.Context, table, clientID, value string) error { - l.tablesLock.Lock() - defer l.tablesLock.Unlock() - - if _, ok := l.tables[table]; !ok { - l.tables[table] = map[string]string{} - } - prev := l.tables[table][clientID] - l.tables[table][clientID] = value - if prev != value { - // only flush if the value changed - return l.flushTable(table, l.tables[table]) - } - return nil -} - -func (l *Local) Close(_ context.Context) error { - l.tablesLock.RLock() - defer l.tablesLock.RUnlock() - - return l.flush() -} - -func (l *Local) flush() error { - for table, kv := range l.tables { - err := l.flushTable(table, kv) - if err != nil { - return err - } - } - return nil -} - -func (l *Local) flushTable(table string, entries entries) error { - if len(entries) == 0 { - return nil - } - - err := os.MkdirAll(l.spec.Path, 0755) - if err != nil { - return fmt.Errorf("failed to create state directory %v: %w", l.spec.Path, err) - } - - b, err := json.MarshalIndent(entries, "", " ") - if err != nil { - return fmt.Errorf("failed to marshal state for table %v: %w", table, err) - } - f := path.Join(l.spec.Path, l.sourceName+"-"+table+".json") - err = os.WriteFile(f, b, 0644) - if err != nil { - return fmt.Errorf("failed to write state for table %v: %w", table, err) - } - - return nil -} diff --git a/internal/backends/local/spec.go b/internal/backends/local/spec.go deleted file mode 100644 index f2b7040c1d..0000000000 --- a/internal/backends/local/spec.go +++ /dev/null @@ -1,12 +0,0 @@ -package local - -type Spec struct { - // Path is the path to the local directory. - Path string `json:"path"` -} - -func (s *Spec) SetDefaults() { - if s.Path == "" { - s.Path = ".cq/state" - } -} diff --git a/internal/backends/nop/nop.go b/internal/backends/nop/nop.go deleted file mode 100644 index 45e713608a..0000000000 --- a/internal/backends/nop/nop.go +++ /dev/null @@ -1,23 +0,0 @@ -package nop - -import "context" - -func New() *Backend { - return &Backend{} -} - -// Backend can be used in cases where no backend is specified to avoid the need to check for nil -// pointers in all resolvers. -type Backend struct{} - -func (*Backend) Set(_ context.Context, _, _, _ string) error { - return nil -} - -func (*Backend) Get(_ context.Context, _, _ string) (string, error) { - return "", nil -} - -func (*Backend) Close(_ context.Context) error { - return nil -} diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 93fe380574..7419cf10f2 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -53,7 +53,7 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( case specs.MigrateModeSafe: s.migrateMode = plugin.MigrateModeSafe case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForced + s.migrateMode = plugin.MigrateModeForce } return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) } @@ -84,7 +84,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr case specs.MigrateModeSafe: migrateMode = plugin.MigrateModeSafe case specs.MigrateModeForced: - migrateMode = plugin.MigrateModeForced + migrateMode = plugin.MigrateModeForce default: return nil, status.Errorf(codes.InvalidArgument, "invalid migrate mode: %v", s.spec.MigrateMode) } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index c65c6b4256..d53e5e2ee9 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -46,7 +46,7 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. case specs.MigrateModeSafe: s.migrateMode = plugin.MigrateModeSafe case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForced + s.migrateMode = plugin.MigrateModeForce } return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index b50de13253..53b02b1cd5 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -28,10 +28,10 @@ const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB type Server struct { pb.UnimplementedPluginServer - Plugin *plugin.Plugin - Logger zerolog.Logger + Plugin *plugin.Plugin + Logger zerolog.Logger Directory string - NoSentry bool + NoSentry bool } func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { @@ -93,7 +93,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { syncOptions.Scheduler = plugin.SchedulerRoundRobin } - sourceName := req.SourceName + // sourceName := req.SourceName if req.StateBackend != nil { opts := []managedplugin.Option{ @@ -104,9 +104,9 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { opts = append(opts, managedplugin.WithNoSentry()) } statePlugin, err := managedplugin.NewClient(ctx, managedplugin.Config{ - Path: req.StateBackend.Path, + Path: req.StateBackend.Path, Registry: managedplugin.Registry(req.StateBackend.Registry), - Version: req.StateBackend.Version, + Version: req.StateBackend.Version, }, opts...) if err != nil { return status.Errorf(codes.Internal, "failed to create state plugin: %v", err) @@ -117,10 +117,17 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { } syncOptions.StateBackend = stateClient } + if req.SyncTime != nil { + syncOptions.SyncTime = req.SyncTime.AsTime() + } + + if req.SourceName != "" { + syncOptions.SourceName = req.SourceName + } go func() { defer close(records) - err := s.Plugin.Sync(ctx, sourceName, req.SyncTime.AsTime(), syncOptions, records) + err := s.Plugin.Sync(ctx, syncOptions, records) if err != nil { syncErr = fmt.Errorf("failed to sync records: %w", err) } @@ -196,7 +203,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr case pb.MIGRATE_MODE_SAFE: migrateMode = plugin.MigrateModeSafe case pb.MIGRATE_MODE_FORCE: - migrateMode = plugin.MigrateModeForced + migrateMode = plugin.MigrateModeForce } return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) } @@ -345,4 +352,4 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { func (s *Server) Close(ctx context.Context, _ *pb.Close_Request) (*pb.Close_Response, error) { return &pb.Close_Response{}, s.Plugin.Close(ctx) -} \ No newline at end of file +} diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index 4b7a43c297..be152297b7 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -23,11 +23,11 @@ const keyColumn = "key" const valueColumn = "value" type ClientV3 struct { - client pbPlugin.PluginClient + client pbPlugin.PluginClient encodedTables [][]byte - mem map[string]string - keys []string - values []string + mem map[string]string + keys []string + values []string } func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.StateBackendSpec) (state.Client, error) { @@ -42,8 +42,8 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St c := &ClientV3{ client: pbPlugin.NewPluginClient(conn), - mem: make(map[string]string), - keys: make([]string, 0), + mem: make(map[string]string), + keys: make([]string, 0), values: make([]string, 0), } name := spec.Name @@ -51,8 +51,8 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St Name: stateTablePrefix + name, Columns: []schema.Column{ { - Name: keyColumn, - Type: arrow.BinaryTypes.String, + Name: keyColumn, + Type: arrow.BinaryTypes.String, PrimaryKey: true, }, { @@ -74,7 +74,7 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St } if _, err := c.client.Migrate(ctx, &pbPlugin.Migrate_Request{ - Tables: c.encodedTables, + Tables: c.encodedTables, MigrateMode: pbPlugin.MIGRATE_MODE_SAFE, }); err != nil { return nil, err @@ -116,7 +116,6 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St return c, nil } - func (c *ClientV3) SetKey(ctx context.Context, key string, value string) error { c.mem[key] = value return nil @@ -157,9 +156,9 @@ func (c *ClientV3) flush(ctx context.Context) error { return nil } -func (c *ClientV3) GetKey(ctx context.Context, key string) (string, error) { +func (c *ClientV3) GetKey(ctx context.Context, key string) (string, error) { if val, ok := c.mem[key]; ok { return val, nil } return "", fmt.Errorf("key not found") -} \ No newline at end of file +} diff --git a/plugin/memdb_test.go b/plugin/memdb_test.go index d89a70ac0d..2f9f54a506 100644 --- a/plugin/memdb_test.go +++ b/plugin/memdb_test.go @@ -12,19 +12,19 @@ import ( ) var migrateStrategyOverwrite = MigrateStrategy{ - AddColumn: pbPlugin.WriteSpec_FORCE, - AddColumnNotNull: pbPlugin.WriteSpec_FORCE, - RemoveColumn: pbPlugin.WriteSpec_FORCE, - RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, - ChangeColumn: pbPlugin.WriteSpec_FORCE, + AddColumn: MigrateModeForce, + AddColumnNotNull: MigrateModeForce, + RemoveColumn: MigrateModeForce, + RemoveColumnNotNull: MigrateModeForce, + ChangeColumn: MigrateModeForce, } var migrateStrategyAppend = MigrateStrategy{ - AddColumn: pbPlugin.WriteSpec_FORCE, - AddColumnNotNull: pbPlugin.WriteSpec_FORCE, - RemoveColumn: pbPlugin.WriteSpec_FORCE, - RemoveColumnNotNull: pbPlugin.WriteSpec_FORCE, - ChangeColumn: pbPlugin.WriteSpec_FORCE, + AddColumn: MigrateModeForce, + AddColumnNotNull: MigrateModeForce, + RemoveColumn: MigrateModeForce, + RemoveColumnNotNull: MigrateModeForce, + ChangeColumn: MigrateModeForce, } func TestPluginUnmanagedClient(t *testing.T) { @@ -33,7 +33,7 @@ func TestPluginUnmanagedClient(t *testing.T) { func() *Plugin { return NewPlugin("test", "development", NewMemDBClient) }, - pbPlugin.Spec{}, + nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -46,7 +46,7 @@ func TestPluginManagedClient(t *testing.T) { func() *Plugin { return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter()) }, - pbPlugin.Spec{}, + nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -59,7 +59,7 @@ func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), WithDefaultBatchSize(1), WithDefaultBatchSizeBytes(1)) - }, pbPlugin.Spec{}, + }, nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -73,7 +73,7 @@ func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { WithDefaultBatchSize(100000000), WithDefaultBatchSizeBytes(100000000)) }, - pbPlugin.Spec{}, + nil, PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, @@ -99,7 +99,7 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() p := NewPlugin("test", "development", NewMemDBClientErrOnNew) - err := p.Init(ctx, pbPlugin.Spec{}) + err := p.Init(ctx, nil) if err == nil { t.Fatal("expected error") @@ -110,9 +110,7 @@ func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) p := NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{}, - }); err != nil { + if err := p.Init(ctx, nil); err != nil { t.Fatal(err) } table := schema.TestTable("test", schema.TestSourceOptions{}) diff --git a/plugin/options.go b/plugin/options.go index 6ad134b843..11841fa53b 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -13,11 +13,11 @@ type MigrateMode int const ( MigrateModeSafe MigrateMode = iota - MigrateModeForced + MigrateModeForce ) var ( - migrateModeStrings = []string{"safe", "forced"} + migrateModeStrings = []string{"safe", "force"} ) func (m MigrateMode) String() string { @@ -49,7 +49,6 @@ func RegistryFromString(s string) (Registry, error) { } } - type WriteMode int const ( diff --git a/plugin/plugin.go b/plugin/plugin.go index aa456bdaac..ec5d00b1f1 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,8 +7,6 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-pb-go/specs/v0" - "github.com/cloudquery/plugin-sdk/v4/backend" "github.com/cloudquery/plugin-sdk/v4/caser" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -21,12 +19,6 @@ const ( defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB ) -type Options struct { - Backend backend.Backend -} - -type NewExecutionClientFunc func(context.Context, zerolog.Logger, specs.Source, Options) (schema.ClientMeta, error) - type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type ManagedSyncClient interface { @@ -40,7 +32,7 @@ type Client interface { WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error - Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error + // Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error } @@ -106,8 +98,6 @@ type Plugin struct { client Client // sessionTables are the sessionTables schema.Tables - // backend is the backend used to store the cursor state - backend backend.Backend // spec is the spec the client was initialized with spec any // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id @@ -279,12 +269,5 @@ func (p *Plugin) Close(ctx context.Context) error { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - if p.backend != nil { - err := p.backend.Close(ctx) - if err != nil { - return fmt.Errorf("failed to close backend: %w", err) - } - p.backend = nil - } return p.client.Close(ctx) } diff --git a/plugin/plugin_managed_source_test.go b/plugin/plugin_managed_source_test.go index 8520c3a7be..c2071cc977 100644 --- a/plugin/plugin_managed_source_test.go +++ b/plugin/plugin_managed_source_test.go @@ -9,7 +9,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/go-cmp/cmp" @@ -359,7 +358,7 @@ func (testRand) Read(p []byte) (n int, err error) { func TestManagedSync(t *testing.T) { uuid.SetRand(testRand{}) - for _, scheduler := range plugin.AllSchedulers { + for _, scheduler := range AllSchedulers { for _, tc := range syncTestCases { tc := tc tc.table = tc.table.Copy(nil) @@ -370,7 +369,7 @@ func TestManagedSync(t *testing.T) { } } -func testSyncTable(t *testing.T, tc syncTestCase, scheduler plugin.Scheduler, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, scheduler Scheduler, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, diff --git a/plugin/plugin_managed_sync.go b/plugin/plugin_managed_sync.go index 2f52685513..8d4b113ebb 100644 --- a/plugin/plugin_managed_sync.go +++ b/plugin/plugin_managed_sync.go @@ -3,7 +3,6 @@ package plugin import ( "context" "fmt" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -12,7 +11,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (p *Plugin) managedSync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) managedSync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { if len(p.sessionTables) == 0 { return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") } @@ -42,4 +41,4 @@ func (p *Plugin) managedSync(ctx context.Context, sourceName string, syncTime ti res <- rec } return nil -} \ No newline at end of file +} diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index bb59759c0c..a01858eace 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -8,10 +8,23 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" - "github.com/google/uuid" + "github.com/rs/zerolog" ) +type Operation int +const ( + OperationEqual Operation = iota + OperationNotEqual + OperationGreaterThan + OperationLessThan +) + +type WhereClause struct { + ColumnName string + Operation Operation + Value string +} type SyncOptions struct { Tables []string @@ -19,7 +32,39 @@ type SyncOptions struct { Concurrency int64 Scheduler Scheduler DeterministicCQID bool - StateBackend state.Client + // SyncTime if specified then this will be add to every table as _sync_time column + SyncTime time.Time + // If spceified then this will be added to every table as _source_name column + SourceName string + StateBackend state.Client +} + +type ReadOnlyClient interface { + NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) + Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error + Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error + Close(ctx context.Context) error +} + +type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (ReadOnlyClient, error) + +// NewReadOnlyPlugin returns a new CloudQuery Plugin with the given name, version and implementation. +// this plugin will only support read operations. For ReadWrite plugin use NewPlugin. +func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { + newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { + readOnlyClient, err := newClient(ctx, logger, any) + if err != nil { + return nil, err + } + wrapperClient := struct { + ReadOnlyClient + UnimplementedWriter + }{ + ReadOnlyClient: readOnlyClient, + } + return wrapperClient, nil + } + return NewPlugin(name, version, newClientWrapper, options...) } // Tables returns all tables supported by this source plugin @@ -35,35 +80,31 @@ func (p *Plugin) DynamicTables() schema.Tables { return p.sessionTables } -func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { - var readErr error - ch := make(chan arrow.Record) - go func() { - defer close(ch) - readErr = p.Read(ctx, table, sourceName, ch) - }() - // nolint:prealloc - var resources []arrow.Record - for resource := range ch { - resources = append(resources, resource) - } - return resources, readErr -} - -func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return p.client.Read(ctx, table, sourceName, res) -} +// func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { +// var readErr error +// ch := make(chan arrow.Record) +// go func() { +// defer close(ch) +// readErr = p.Read(ctx, table, sourceName, ch) +// }() +// // nolint:prealloc +// var resources []arrow.Record +// for resource := range ch { +// resources = append(resources, resource) +// } +// return resources, readErr +// } -func (p *Plugin) Acknowledge(ctx context.Context, recordUUID uuid.UUID) error { - return nil -} +// func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { +// return p.client.Read(ctx, table, sourceName, res) +// } -func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions) ([]arrow.Record, error) { +func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) go func() { defer close(ch) - err = p.Sync(ctx, sourceName, syncTime, options, ch) + err = p.Sync(ctx, options, ch) }() // nolint:prealloc var resources []arrow.Record @@ -74,12 +115,12 @@ func (p *Plugin) syncAll(ctx context.Context, sourceName string, syncTime time.T } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time, options SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - p.syncTime = syncTime + p.syncTime = options.SyncTime startTime := time.Now() if p.unmanagedSync { @@ -87,7 +128,7 @@ func (p *Plugin) Sync(ctx context.Context, sourceName string, syncTime time.Time return fmt.Errorf("failed to sync unmanaged client: %w", err) } } else { - if err := p.managedSync(ctx, sourceName, syncTime, options, res); err != nil { + if err := p.managedSync(ctx, options, res); err != nil { return fmt.Errorf("failed to sync managed client: %w", err) } } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index 04993d5b97..6fe3d0aa7e 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -30,7 +30,9 @@ func TestPluginUnmanagedSync(t *testing.T) { if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, testRecords); err != nil { t.Fatal(err) } - gotRecords, err := p.readAll(ctx, testTable, "test") + gotRecords, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{testTable.Name}, + }) if err != nil { t.Fatal(err) } @@ -40,7 +42,9 @@ func TestPluginUnmanagedSync(t *testing.T) { if !array.RecordEqual(testRecords[0], gotRecords[0]) { t.Fatal("records are not equal") } - records, err := p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + records, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{testTable.Name}, + }) if err != nil { t.Fatal(err) } @@ -56,7 +60,9 @@ func TestPluginUnmanagedSync(t *testing.T) { if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { t.Fatal(err) } - records, err = p.syncAll(ctx, sourceName, syncTime, SyncOptions{}) + records, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{testTable.Name}, + }) if err != nil { t.Fatal(err) } diff --git a/plugin/state.go b/plugin/state.go deleted file mode 100644 index 6831e406a6..0000000000 --- a/plugin/state.go +++ /dev/null @@ -1 +0,0 @@ -package plugin \ No newline at end of file diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go index 300900e287..6ac079ff13 100644 --- a/plugin/testing_overwrite_deletestale.go +++ b/plugin/testing_overwrite_deletestale.go @@ -47,7 +47,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } sortRecordsBySyncTime(table, resources) - resourcesRead, err := p.readAll(ctx, table, sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -71,7 +75,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte } // read from incremental table - resourcesRead, err = p.readAll(ctx, incTable, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -99,7 +107,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte return fmt.Errorf("failed to write all second time: %w", err) } - resourcesRead, err = p.readAll(ctx, table, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } @@ -116,7 +128,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte return fmt.Errorf("after overwrite expected first resource to be different. diff: %s", diff) } - resourcesRead, err = p.readAll(ctx, table, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } @@ -136,7 +152,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte // we expect the incremental table to still have 3 resources, because delete-stale should // not apply there - resourcesRead, err = p.readAll(ctx, incTable, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{incTable.Name}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all from incremental table: %w", err) } diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 59b1cd5a9e..01a09c98b6 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -37,7 +36,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, sourceName string, spec any, o go func() { defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), sourceName, time.Now(), options, resourcesChannel) + syncErr = plugin.Sync(context.Background(), options, resourcesChannel) }() syncedResources := make([]arrow.Record, 0) diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 501ff39273..e7e50ef76f 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -227,7 +227,7 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, test if suite.tests.SkipMigrateOverwriteForce { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeForced + migrateMode := MigrateModeForce writeMode := WriteModeOverwrite suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) @@ -263,7 +263,7 @@ func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, test if suite.tests.SkipMigrateAppendForce { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeForced + migrateMode := MigrateModeForce writeMode := WriteModeAppend suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) }) diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go index 68efc4ca8b..d4ccdd15d4 100644 --- a/plugin/testing_write_append.go +++ b/plugin/testing_write_append.go @@ -50,7 +50,11 @@ func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, } } - resourcesRead, err := p.readAll(ctx, tables[0], sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index d59da7fc8b..978c5951a2 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -56,7 +56,10 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. stripNullsFromLists(resource2) } - resourcesRead, err := p.readAll(ctx, target, sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{target.Name}, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -93,7 +96,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( testOpts PluginTestSuiteRunnerOptions, ) { t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.AddColumn == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -129,7 +132,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.AddColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -163,7 +166,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.RemoveColumn == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -196,7 +199,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.RemoveColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -230,7 +233,7 @@ func (*PluginTestSuite) destinationPluginTestMigrate( }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == MigrateModeForced && migrateMode == MigrateModeSafe { + if strategy.ChangeColumn == MigrateModeForce && migrateMode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go index 34e89e8b2d..fd851a6e2e 100644 --- a/plugin/testing_write_overwrite.go +++ b/plugin/testing_write_overwrite.go @@ -43,7 +43,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, if testOpts.IgnoreNullsInLists { stripNullsFromLists(resources) } - resourcesRead, err := p.readAll(ctx, table, sourceName) + resourcesRead, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: syncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all: %w", err) } @@ -85,7 +89,11 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, if testOpts.IgnoreNullsInLists { stripNullsFromLists(updatedResource) } - resourcesRead, err = p.readAll(ctx, table, sourceName) + resourcesRead, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + SyncTime: secondSyncTime, + SourceName: sourceName, + }) if err != nil { return fmt.Errorf("failed to read all second time: %w", err) } diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index a1ba81f794..181474c3c4 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -130,7 +130,9 @@ func TestDestination(t *testing.T) { // serversDestination table := serversDestination.TableV2ToV3(tableV2) readCh := make(chan arrow.Record, 1) - if err := p.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Sync(ctx, plugin.SyncOptions{ + Tables: []string{tableName}, + }, readCh); err != nil { t.Fatal(err) } close(readCh) diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index c65320eea1..3f15930022 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -130,7 +130,9 @@ func TestDestinationV1(t *testing.T) { } // serversDestination readCh := make(chan arrow.Record, 1) - if err := p.Read(ctx, table, sourceName, readCh); err != nil { + if err := p.Sync(ctx, plugin.SyncOptions{ + Tables: []string{tableName}, + }, readCh); err != nil { t.Fatal(err) } close(readCh) diff --git a/serve/plugin.go b/serve/plugin.go index 483312b871..f64d0ba1ec 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -159,8 +159,8 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { ) s.plugin.SetLogger(logger) pbv3.RegisterPluginServer(grpcServer, &serversv3.Server{ - Plugin: s.plugin, - Logger: logger, + Plugin: s.plugin, + Logger: logger, NoSentry: noSentry, }) if s.destinationV0V1Server { @@ -177,7 +177,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { Versions: []string{"v0", "v1", "v2", "v3"}, }) pbdiscoveryv1.RegisterDiscoveryServer(grpcServer, &discoveryServerV1.Server{ - Versions: []uint64{0,1,2,3}, + Versions: []uint64{0, 1, 2, 3}, }) version := s.plugin.Version() diff --git a/serve/state_v3_test.go b/serve/state_v3_test.go index d1442c2e88..f75d53353b 100644 --- a/serve/state_v3_test.go +++ b/serve/state_v3_test.go @@ -54,4 +54,4 @@ func TestStateV3(t *testing.T) { if serverErr != nil { t.Fatal(serverErr) } -} \ No newline at end of file +} diff --git a/state/state.go b/state/state.go index e92423df3c..55f070704e 100644 --- a/state/state.go +++ b/state/state.go @@ -5,4 +5,4 @@ import "context" type Client interface { SetKey(ctx context.Context, key string, value string) error GetKey(ctx context.Context, key string) (string, error) -} \ No newline at end of file +} From 88ccb18b1d9b49b54cdd1ab9c45ed8fe15ba0d9d Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 4 Jun 2023 22:31:07 +0300 Subject: [PATCH 060/125] more work --- plugin/plugin_reader.go | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index a01858eace..d02ca6bc75 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -11,21 +11,6 @@ import ( "github.com/rs/zerolog" ) -type Operation int - -const ( - OperationEqual Operation = iota - OperationNotEqual - OperationGreaterThan - OperationLessThan -) - -type WhereClause struct { - ColumnName string - Operation Operation - Value string -} - type SyncOptions struct { Tables []string SkipTables []string @@ -80,25 +65,6 @@ func (p *Plugin) DynamicTables() schema.Tables { return p.sessionTables } -// func (p *Plugin) readAll(ctx context.Context, table *schema.Table, sourceName string) ([]arrow.Record, error) { -// var readErr error -// ch := make(chan arrow.Record) -// go func() { -// defer close(ch) -// readErr = p.Read(ctx, table, sourceName, ch) -// }() -// // nolint:prealloc -// var resources []arrow.Record -// for resource := range ch { -// resources = append(resources, resource) -// } -// return resources, readErr -// } - -// func (p *Plugin) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { -// return p.client.Read(ctx, table, sourceName, res) -// } - func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) From 6846b97c73f32a1fd282598a22f7593939ce58ed Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:17:16 +0300 Subject: [PATCH 061/125] more wip --- internal/servers/plugin/v3/plugin.go | 24 -- plugin/options.go | 6 - plugin/plugin.go | 2 - plugin/plugin_managed_sync.go | 44 --- plugin/plugin_reader.go | 11 +- plugin/scheduler.go | 163 ----------- scheduler/metrics.go | 125 ++++++++ scheduler/metrics_test.go | 37 +++ .../plugin_managed_source_test.go.backup | 2 +- scheduler/scheduler.go | 275 ++++++++++++++++++ {plugin => scheduler}/scheduler_dfs.go | 86 +++--- .../scheduler_round_robin.go | 48 +-- .../scheduler_round_robin_test.go | 9 +- 13 files changed, 515 insertions(+), 317 deletions(-) delete mode 100644 plugin/plugin_managed_sync.go delete mode 100644 plugin/scheduler.go create mode 100644 scheduler/metrics.go create mode 100644 scheduler/metrics_test.go rename plugin/plugin_managed_source_test.go => scheduler/plugin_managed_source_test.go.backup (99%) create mode 100644 scheduler/scheduler.go rename {plugin => scheduler}/scheduler_dfs.go (69%) rename {plugin => scheduler}/scheduler_round_robin.go (64%) rename {plugin => scheduler}/scheduler_round_robin_test.go (94%) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 53b02b1cd5..fa432af917 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -3,7 +3,6 @@ package plugin import ( "bytes" "context" - "encoding/json" "errors" "fmt" "io" @@ -163,29 +162,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { return syncErr } -func (s *Server) GetMetrics(context.Context, *pb.GetMetrics_Request) (*pb.GetMetrics_Response, error) { - // Aggregate metrics before sending to keep response size small. - // Temporary fix for https://github.com/cloudquery/cloudquery/issues/3962 - m := s.Plugin.Metrics() - agg := &plugin.TableClientMetrics{} - for _, table := range m.TableClient { - for _, tableClient := range table { - agg.Resources += tableClient.Resources - agg.Errors += tableClient.Errors - agg.Panics += tableClient.Panics - } - } - b, err := json.Marshal(&plugin.Metrics{ - TableClient: map[string]map[string]*plugin.TableClientMetrics{"": {"": agg}}, - }) - if err != nil { - return nil, fmt.Errorf("failed to marshal source metrics: %w", err) - } - return &pb.GetMetrics_Response{ - Metrics: b, - }, nil -} - func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migrate_Response, error) { schemas, err := schema.NewSchemasFromBytes(req.Tables) if err != nil { diff --git a/plugin/options.go b/plugin/options.go index 11841fa53b..66a13b69f7 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -113,12 +113,6 @@ func WithNoInternalColumns() Option { } } -func WithUnmanagedSync() Option { - return func(p *Plugin) { - p.unmanagedSync = true - } -} - // WithTitleTransformer allows the plugin to control how table names get turned into titles for the // generated documentation. func WithTitleTransformer(t func(*schema.Table) string) Option { diff --git a/plugin/plugin.go b/plugin/plugin.go index ec5d00b1f1..44a8731fec 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -26,13 +26,11 @@ type ManagedSyncClient interface { } type Client interface { - NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error - // Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error } diff --git a/plugin/plugin_managed_sync.go b/plugin/plugin_managed_sync.go deleted file mode 100644 index 8d4b113ebb..0000000000 --- a/plugin/plugin_managed_sync.go +++ /dev/null @@ -1,44 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/scalar" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -func (p *Plugin) managedSync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { - if len(p.sessionTables) == 0 { - return fmt.Errorf("no tables to sync - please check your spec 'tables' and 'skip_tables' settings") - } - - managedClient, err := p.client.NewManagedSyncClient(ctx, options) - if err != nil { - return fmt.Errorf("failed to create managed sync client: %w", err) - } - - resources := make(chan *schema.Resource) - go func() { - defer close(resources) - switch options.Scheduler { - case SchedulerDFS: - p.syncDfs(ctx, options, managedClient, p.sessionTables, resources) - case SchedulerRoundRobin: - p.syncRoundRobin(ctx, options, managedClient, p.sessionTables, resources) - default: - panic(fmt.Errorf("unknown scheduler %s", options.Scheduler)) - } - }() - for resource := range resources { - vector := resource.GetValues() - bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) - scalar.AppendToRecordBuilder(bldr, vector) - rec := bldr.NewRecord() - res <- rec - } - return nil -} diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index d02ca6bc75..e040976da0 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -25,7 +25,6 @@ type SyncOptions struct { } type ReadOnlyClient interface { - NewManagedSyncClient(ctx context.Context, options SyncOptions) (ManagedSyncClient, error) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error @@ -89,14 +88,8 @@ func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow p.syncTime = options.SyncTime startTime := time.Now() - if p.unmanagedSync { - if err := p.client.Sync(ctx, options, res); err != nil { - return fmt.Errorf("failed to sync unmanaged client: %w", err) - } - } else { - if err := p.managedSync(ctx, options, res); err != nil { - return fmt.Errorf("failed to sync managed client: %w", err) - } + if err := p.client.Sync(ctx, options, res); err != nil { + return fmt.Errorf("failed to sync unmanaged client: %w", err) } p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") diff --git a/plugin/scheduler.go b/plugin/scheduler.go deleted file mode 100644 index c00ed2c8a9..0000000000 --- a/plugin/scheduler.go +++ /dev/null @@ -1,163 +0,0 @@ -package plugin - -import ( - "context" - "errors" - "fmt" - "runtime/debug" - "sync" - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/getsentry/sentry-go" - "github.com/rs/zerolog" - "github.com/thoas/go-funk" -) - -const ( - minTableConcurrency = 1 - minResourceConcurrency = 100 -) - -const periodicMetricLoggerInterval = 30 * time.Second - -func (p *Plugin) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { - clientName := client.ID() - for _, table := range tables { - metrics := p.metrics.TableClient[table.Name][clientName] - p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) - } -} - -func (p *Plugin) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { - var validationErr *schema.ValidationError - ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) - defer cancel() - resource := schema.NewResourceData(table, parent, item) - objectStartTime := time.Now() - clientID := client.ID() - tableMetrics := p.metrics.TableClient[table.Name][clientID] - logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - if table.PreResourceResolver != nil { - if err := table.PreResourceResolver(ctx, client, resource); err != nil { - logger.Error().Err(err).Msg("pre resource resolver failed") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - return nil - } - } - - for _, c := range table.Columns { - p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) - } - - if table.PostResourceResolver != nil { - if err := table.PostResourceResolver(ctx, client, resource); err != nil { - logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", table.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - atomic.AddUint64(&tableMetrics.Resources, 1) - return resource -} - -func (p *Plugin) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { - var validationErr *schema.ValidationError - columnStartTime := time.Now() - defer func() { - if err := recover(); err != nil { - stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) - logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") - atomic.AddUint64(&tableMetrics.Panics, 1) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(stack) - }) - } - }() - - if c.Resolver != nil { - if err := c.Resolver(ctx, client, resource, c); err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } else { - // base use case: try to get column with CamelCase name - v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) - if v != nil { - err := resource.Set(c.Name, v) - if err != nil { - logger.Error().Err(err).Msg("column resolver finished with error") - atomic.AddUint64(&tableMetrics.Errors, 1) - if errors.As(err, &validationErr) { - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", resource.Table.Name) - scope.SetTag("column", c.Name) - sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) - }) - } - } - } - } -} - -func (p *Plugin) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { - defer wg.Done() - - ticker := time.NewTicker(periodicMetricLoggerInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - p.logger.Info(). - Uint64("total_resources", p.metrics.TotalResourcesAtomic()). - Uint64("total_errors", p.metrics.TotalErrorsAtomic()). - Uint64("total_panics", p.metrics.TotalPanicsAtomic()). - Msg("Sync in progress") - } - } -} - -// unparam's suggestion to remove the second parameter is not good advice here. -// nolint:unparam -func max(a, b uint64) uint64 { - if a > b { - return a - } - return b -} diff --git a/scheduler/metrics.go b/scheduler/metrics.go new file mode 100644 index 0000000000..372965ba93 --- /dev/null +++ b/scheduler/metrics.go @@ -0,0 +1,125 @@ +package scheduler + +import ( + "sync/atomic" + "time" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type Metrics struct { + TableClient map[string]map[string]*TableClientMetrics +} + +type TableClientMetrics struct { + Resources uint64 + Errors uint64 + Panics uint64 + StartTime time.Time + EndTime time.Time +} + +func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { + return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics +} + +// Equal compares to stats. Mostly useful in testing +func (s *Metrics) Equal(other *Metrics) bool { + for table, clientStats := range s.TableClient { + for client, stats := range clientStats { + if _, ok := other.TableClient[table]; !ok { + return false + } + if _, ok := other.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(other.TableClient[table][client]) { + return false + } + } + } + for table, clientStats := range other.TableClient { + for client, stats := range clientStats { + if _, ok := s.TableClient[table]; !ok { + return false + } + if _, ok := s.TableClient[table][client]; !ok { + return false + } + if !stats.Equal(s.TableClient[table][client]) { + return false + } + } + } + return true +} + +func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { + s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) + for _, client := range clients { + s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} + } + for _, relation := range table.Relations { + s.initWithClients(relation, clients) + } +} + +func (s *Metrics) TotalErrors() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Errors + } + } + return total +} + +func (s *Metrics) TotalErrorsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Errors) + } + } + return total +} + +func (s *Metrics) TotalPanics() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Panics + } + } + return total +} + +func (s *Metrics) TotalPanicsAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Panics) + } + } + return total +} + +func (s *Metrics) TotalResources() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += metrics.Resources + } + } + return total +} + +func (s *Metrics) TotalResourcesAtomic() uint64 { + var total uint64 + for _, clientMetrics := range s.TableClient { + for _, metrics := range clientMetrics { + total += atomic.LoadUint64(&metrics.Resources) + } + } + return total +} diff --git a/scheduler/metrics_test.go b/scheduler/metrics_test.go new file mode 100644 index 0000000000..1bc11daa58 --- /dev/null +++ b/scheduler/metrics_test.go @@ -0,0 +1,37 @@ +package scheduler + +import "testing" + +func TestMetrics(t *testing.T) { + s := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + s.TableClient["test_table"] = make(map[string]*TableClientMetrics) + s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if s.TotalResources() != 1 { + t.Fatal("expected 1 resource") + } + if s.TotalErrors() != 2 { + t.Fatal("expected 2 error") + } + if s.TotalPanics() != 3 { + t.Fatal("expected 3 panics") + } + + other := &Metrics{ + TableClient: make(map[string]map[string]*TableClientMetrics), + } + other.TableClient["test_table"] = make(map[string]*TableClientMetrics) + other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ + Resources: 1, + Errors: 2, + Panics: 3, + } + if !s.Equal(other) { + t.Fatal("expected metrics to be equal") + } +} diff --git a/plugin/plugin_managed_source_test.go b/scheduler/plugin_managed_source_test.go.backup similarity index 99% rename from plugin/plugin_managed_source_test.go rename to scheduler/plugin_managed_source_test.go.backup index c2071cc977..e0a006a4ca 100644 --- a/plugin/plugin_managed_source_test.go +++ b/scheduler/plugin_managed_source_test.go.backup @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "context" diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go new file mode 100644 index 0000000000..228582ef7a --- /dev/null +++ b/scheduler/scheduler.go @@ -0,0 +1,275 @@ +package scheduler + +import ( + "bytes" + "context" + "errors" + "fmt" + "runtime/debug" + "sync/atomic" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/getsentry/sentry-go" + "github.com/rs/zerolog" + "github.com/thoas/go-funk" + "golang.org/x/sync/semaphore" +) + +const ( + minTableConcurrency = 1 + minResourceConcurrency = 100 + defaultConcurrency = 200000 +) + +type SchedulerStrategy int + +const ( + SchedulerDFS SchedulerStrategy = iota + SchedulerRoundRobin +) + +var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} +var AllSchedulerNames = [...]string{ + SchedulerDFS: "dfs", + SchedulerRoundRobin: "round-robin", +} + +type Schedulers []SchedulerStrategy + +func (s Schedulers) String() string { + var buffer bytes.Buffer + for i, scheduler := range s { + if i > 0 { + buffer.WriteString(", ") + } + buffer.WriteString(scheduler.String()) + } + return buffer.String() +} + +func (s SchedulerStrategy) String() string { + return AllSchedulerNames[s] +} + +const periodicMetricLoggerInterval = 30 * time.Second + +type Option func(*Scheduler) + +func WithLogger(logger zerolog.Logger) Option { + return func(s *Scheduler) { + s.logger = logger + } +} + +func WithDeterministicCQId(deterministicCQId bool) Option { + return func(s *Scheduler) { + s.deterministicCQId = deterministicCQId + } +} + +func WithConcurrency(concurrency uint64) Option { + return func(s *Scheduler) { + s.concurrency = concurrency + } +} + +type Scheduler struct { + tables schema.Tables + client schema.ClientMeta + caser *caser.Caser + strategy SchedulerStrategy + // status sync metrics + metrics *Metrics + maxDepth uint64 + // resourceSem is a semaphore that limits the number of concurrent resources being fetched + resourceSem *semaphore.Weighted + // tableSem is a semaphore that limits the number of concurrent tables being fetched + tableSems []*semaphore.Weighted + // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. + logger zerolog.Logger + deterministicCQId bool + concurrency uint64 +} + +func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option) *Scheduler { + s := Scheduler{ + tables: tables, + client: client, + metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, + caser: caser.New(), + concurrency: defaultConcurrency, + } + for _, opt := range opts { + opt(&s) + } + return &s +} + +func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { + resources := make(chan *schema.Resource) + go func() { + defer close(resources) + switch s.strategy { + case SchedulerDFS: + s.syncDfs(ctx, resources) + case SchedulerRoundRobin: + s.syncRoundRobin(ctx, resources) + default: + panic(fmt.Errorf("unknown scheduler %s", s.strategy)) + } + }() + for resource := range resources { + vector := resource.GetValues() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) + scalar.AppendToRecordBuilder(bldr, vector) + rec := bldr.NewRecord() + res <- rec + } + return nil +} + +// func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { +// clientName := client.ID() +// for _, table := range tables { +// metrics := p.metrics.TableClient[table.Name][clientName] +// p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") +// p.logTablesMetrics(table.Relations, client) +// } +// } + +func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { + var validationErr *schema.ValidationError + ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + resource := schema.NewResourceData(table, parent, item) + objectStartTime := time.Now() + clientID := client.ID() + tableMetrics := p.metrics.TableClient[table.Name][clientID] + logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Interface("error", err).TimeDiff("duration", time.Now(), objectStartTime).Str("stack", stack).Msg("resource resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + if table.PreResourceResolver != nil { + if err := table.PreResourceResolver(ctx, client, resource); err != nil { + logger.Error().Err(err).Msg("pre resource resolver failed") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + return nil + } + } + + for _, c := range table.Columns { + p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) + } + + if table.PostResourceResolver != nil { + if err := table.PostResourceResolver(ctx, client, resource); err != nil { + logger.Error().Stack().Err(err).Msg("post resource resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", table.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + atomic.AddUint64(&tableMetrics.Resources, 1) + return resource +} + +func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { + var validationErr *schema.ValidationError + columnStartTime := time.Now() + defer func() { + if err := recover(); err != nil { + stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) + logger.Error().Str("column", c.Name).Interface("error", err).TimeDiff("duration", time.Now(), columnStartTime).Str("stack", stack).Msg("column resolver finished with panic") + atomic.AddUint64(&tableMetrics.Panics, 1) + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(stack) + }) + } + }() + + if c.Resolver != nil { + if err := c.Resolver(ctx, client, resource, c); err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } else { + // base use case: try to get column with CamelCase name + v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) + if v != nil { + err := resource.Set(c.Name, v) + if err != nil { + logger.Error().Err(err).Msg("column resolver finished with error") + atomic.AddUint64(&tableMetrics.Errors, 1) + if errors.As(err, &validationErr) { + sentry.WithScope(func(scope *sentry.Scope) { + scope.SetTag("table", resource.Table.Name) + scope.SetTag("column", c.Name) + sentry.CurrentHub().CaptureMessage(validationErr.MaskedError()) + }) + } + } + } + } +} + +// func (p *Scheduler) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { +// defer wg.Done() + +// ticker := time.NewTicker(periodicMetricLoggerInterval) +// defer ticker.Stop() + +// for { +// select { +// case <-ctx.Done(): +// return +// case <-ticker.C: +// p.logger.Info(). +// Uint64("total_resources", p.metrics.TotalResourcesAtomic()). +// Uint64("total_errors", p.metrics.TotalErrorsAtomic()). +// Uint64("total_panics", p.metrics.TotalPanicsAtomic()). +// Msg("Sync in progress") +// } +// } +// } + +// unparam's suggestion to remove the second parameter is not good advice here. +// nolint:unparam +func max(a, b uint64) uint64 { + if a > b { + return a + } + return b +} diff --git a/plugin/scheduler_dfs.go b/scheduler/scheduler_dfs.go similarity index 69% rename from plugin/scheduler_dfs.go rename to scheduler/scheduler_dfs.go index bd87c50aeb..7789dd34e4 100644 --- a/plugin/scheduler_dfs.go +++ b/scheduler/scheduler_dfs.go @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "context" @@ -14,27 +14,27 @@ import ( "golang.org/x/sync/semaphore" ) -func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { +func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) + tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + s.tableSems = make([]*semaphore.Weighted, s.maxDepth) + for i := uint64(0); i < s.maxDepth; i++ { + s.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) // reduce table concurrency logarithmically for every depth level tableConcurrency = max(tableConcurrency/2, minTableConcurrency) } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + s.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) // we have this because plugins can return sometimes clients in a random way which will cause // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client.(schema.ClientMeta)} + preInitialisedClients := make([][]schema.ClientMeta, len(s.tables)) + for i, table := range s.tables { + clients := []schema.ClientMeta{s.client} if table.Multiplex != nil { - clients = table.Multiplex(client.(schema.ClientMeta)) + clients = table.Multiplex(s.client) } // Detect duplicate clients while multiplexing seenClients := make(map[string]bool) @@ -46,44 +46,44 @@ func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Manage scope.SetTag("table", table.Name) sentry.CurrentHub().CaptureMessage("duplicate client ID in " + table.Name) }) - p.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") + s.logger.Warn().Str("client", c.ID()).Str("table", table.Name).Msg("multiplex returned duplicate client") } } preInitialisedClients[i] = clients // we do this here to avoid locks so we initial the metrics structure once in the main goroutines // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) + s.metrics.initWithClients(table, clients) } // We start a goroutine that logs the metrics periodically. // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) + // var logWg sync.WaitGroup + // logWg.Add(1) - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) + // logCtx, logCancel := context.WithCancel(ctx) + // go s.periodicMetricLogger(logCtx, &logWg) var wg sync.WaitGroup - for i, table := range tables { + for i, table := range s.tables { table := table clients := preInitialisedClients[i] for _, client := range clients { client := client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() return } wg.Add(1) go func() { defer wg.Done() - defer p.tableSems[0].Release(1) + defer s.tableSems[0].Release(1) // not checking for error here as nothing much todo. // the error is logged and this happens when context is cancelled - p.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) + s.resolveTableDfs(ctx, table, client, nil, resolvedResources, 1) }() } } @@ -92,19 +92,19 @@ func (p *Plugin) syncDfs(ctx context.Context, options SyncOptions, client Manage wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() } -func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { +func (s *Scheduler) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { var validationErr *schema.ValidationError clientName := client.ID() - logger := p.logger.With().Str("table", table.Name).Str("client", clientName).Logger() + logger := s.logger.With().Str("table", table.Name).Str("client", clientName).Logger() if parent == nil { // Log only for root tables, otherwise we spam too much. logger.Info().Msg("top level table resolver started") } - tableMetrics := p.metrics.TableClient[table.Name][clientName] + tableMetrics := s.metrics.TableClient[table.Name][clientName] res := make(chan any) go func() { @@ -134,17 +134,17 @@ func (p *Plugin) resolveTableDfs(ctx context.Context, table *schema.Table, clien }() for r := range res { - p.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) + s.resolveResourcesDfs(ctx, table, client, parent, r, resolvedResources, depth) } // we don't need any waitgroups here because we are waiting for the channel to close if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) + // s.logTablesMetrics(table.Relations, client) } } -func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { +func (s *Scheduler) resolveResourcesDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resources any, resolvedResources chan<- *schema.Resource, depth int) { resourcesSlice := helpers.InterfaceSlice(resources) if len(resourcesSlice) == 0 { return @@ -156,25 +156,25 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c sentValidationErrors := sync.Map{} for i := range resourcesSlice { i := i - if err := p.resourceSem.Acquire(ctx, 1); err != nil { - p.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") + if err := s.resourceSem.Acquire(ctx, 1); err != nil { + s.logger.Warn().Err(err).Msg("failed to acquire semaphore. context cancelled") wg.Wait() // we have to continue emptying the channel to exit gracefully return } wg.Add(1) go func() { - defer p.resourceSem.Release(1) + defer s.resourceSem.Release(1) defer wg.Done() //nolint:all - resolvedResource := p.resolveResource(ctx, table, client, parent, resourcesSlice[i]) + resolvedResource := s.resolveResource(ctx, table, client, parent, resourcesSlice[i]) if resolvedResource == nil { return } - if err := resolvedResource.CalculateCQID(p.deterministicCQId); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") + if err := resolvedResource.CalculateCQID(s.deterministicCQId); err != nil { + tableMetrics := s.metrics.TableClient[table.Name][client.ID()] + s.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with primary key calculation error") if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { // send resource validation errors to Sentry only once per table, // to avoid sending too many duplicate messages @@ -187,8 +187,8 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c return } if err := resolvedResource.Validate(); err != nil { - tableMetrics := p.metrics.TableClient[table.Name][client.ID()] - p.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") + tableMetrics := s.metrics.TableClient[table.Name][client.ID()] + s.logger.Error().Err(err).Str("table", table.Name).Str("client", client.ID()).Msg("resource resolver finished with validation error") if _, found := sentValidationErrors.LoadOrStore(table.Name, struct{}{}); !found { // send resource validation errors to Sentry only once per table, // to avoid sending too many duplicate messages @@ -212,7 +212,7 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c resolvedResources <- resource for _, relation := range resource.Table.Relations { relation := relation - if err := p.tableSems[depth].Acquire(ctx, 1); err != nil { + if err := s.tableSems[depth].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() return @@ -220,8 +220,8 @@ func (p *Plugin) resolveResourcesDfs(ctx context.Context, table *schema.Table, c wg.Add(1) go func() { defer wg.Done() - defer p.tableSems[depth].Release(1) - p.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) + defer s.tableSems[depth].Release(1) + s.resolveTableDfs(ctx, relation, client, resource, resolvedResources, depth+1) }() } } diff --git a/plugin/scheduler_round_robin.go b/scheduler/scheduler_round_robin.go similarity index 64% rename from plugin/scheduler_round_robin.go rename to scheduler/scheduler_round_robin.go index a0be17938d..104e8f4514 100644 --- a/plugin/scheduler_round_robin.go +++ b/scheduler/scheduler_round_robin.go @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "context" @@ -13,63 +13,63 @@ type tableClient struct { client schema.ClientMeta } -func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client ManagedSyncClient, tables schema.Tables, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(uint64(options.Concurrency/minResourceConcurrency), minTableConcurrency) +func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- *schema.Resource) { + tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency - p.tableSems = make([]*semaphore.Weighted, p.maxDepth) - for i := uint64(0); i < p.maxDepth; i++ { - p.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) + s.tableSems = make([]*semaphore.Weighted, s.maxDepth) + for i := uint64(0); i < s.maxDepth; i++ { + s.tableSems[i] = semaphore.NewWeighted(int64(tableConcurrency)) // reduce table concurrency logarithmically for every depth level tableConcurrency = max(tableConcurrency/2, minTableConcurrency) } - p.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) + s.resourceSem = semaphore.NewWeighted(int64(resourceConcurrency)) // we have this because plugins can return sometimes clients in a random way which will cause // differences between this run and the next one. - preInitialisedClients := make([][]schema.ClientMeta, len(tables)) - for i, table := range tables { - clients := []schema.ClientMeta{client.(schema.ClientMeta)} + preInitialisedClients := make([][]schema.ClientMeta, len(s.tables)) + for i, table := range s.tables { + clients := []schema.ClientMeta{s.client} if table.Multiplex != nil { - clients = table.Multiplex(client.(schema.ClientMeta)) + clients = table.Multiplex(s.client) } preInitialisedClients[i] = clients // we do this here to avoid locks so we initial the metrics structure once in the main goroutines // and then we can just read from it in the other goroutines concurrently given we are not writing to it. - p.metrics.initWithClients(table, clients) + s.metrics.initWithClients(table, clients) } // We start a goroutine that logs the metrics periodically. // It needs its own waitgroup - var logWg sync.WaitGroup - logWg.Add(1) + // var logWg sync.WaitGroup + // logWg.Add(1) - logCtx, logCancel := context.WithCancel(ctx) - go p.periodicMetricLogger(logCtx, &logWg) + // logCtx, logCancel := context.WithCancel(ctx) + // go p.periodicMetricLogger(logCtx, &logWg) - tableClients := roundRobinInterleave(tables, preInitialisedClients) + tableClients := roundRobinInterleave(s.tables, preInitialisedClients) var wg sync.WaitGroup for _, tc := range tableClients { table := tc.table cl := tc.client - if err := p.tableSems[0].Acquire(ctx, 1); err != nil { + if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() return } wg.Add(1) go func() { defer wg.Done() - defer p.tableSems[0].Release(1) + defer s.tableSems[0].Release(1) // not checking for error here as nothing much to do. // the error is logged and this happens when context is cancelled // Round Robin currently uses the DFS algorithm to resolve the tables, but this // may change in the future. - p.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) + s.resolveTableDfs(ctx, table, cl, nil, resolvedResources, 1) }() } @@ -77,8 +77,8 @@ func (p *Plugin) syncRoundRobin(ctx context.Context, options SyncOptions, client wg.Wait() // gracefully shut down the logger goroutine - logCancel() - logWg.Wait() + // logCancel() + // logWg.Wait() } // interleave table-clients so that we get: diff --git a/plugin/scheduler_round_robin_test.go b/scheduler/scheduler_round_robin_test.go similarity index 94% rename from plugin/scheduler_round_robin_test.go rename to scheduler/scheduler_round_robin_test.go index 428b13c8a6..3b746b81bf 100644 --- a/plugin/scheduler_round_robin_test.go +++ b/scheduler/scheduler_round_robin_test.go @@ -1,4 +1,4 @@ -package plugin +package scheduler import ( "testing" @@ -6,6 +6,13 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +type testExecutionClient struct { +} + +func (t *testExecutionClient) ID() string { + return "test" +} + func TestRoundRobinInterleave(t *testing.T) { table1 := &schema.Table{Name: "test_table"} table2 := &schema.Table{Name: "test_table2"} From 101b5ac895ba93dfe034e4f20f6abe0bd372fd3e Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 5 Jun 2023 23:12:45 +0300 Subject: [PATCH 062/125] more wip --- helpers/internal_columns.go | 1 + {plugin => internal/memdb}/memdb.go | 83 ++-------- {plugin => internal/memdb}/memdb_test.go | 88 +++------- .../servers/destination/v0/destinations.go | 13 +- .../servers/destination/v1/destinations.go | 13 +- internal/servers/plugin/v3/plugin.go | 23 +-- plugin/docs.go | 6 +- plugin/metrics.go | 125 -------------- plugin/metrics_test.go | 37 ----- plugin/options.go | 73 +------- plugin/plugin.go | 156 ++++-------------- plugin/plugin_reader.go | 19 +-- plugin/plugin_test.go | 29 +--- plugin/plugin_writer.go | 39 +++-- scheduler/scheduler.go | 36 +--- scheduler/scheduler_dfs.go | 17 +- scheduler/scheduler_round_robin.go | 15 -- transformers/tables.go | 58 +++++++ plugin/managed_writer.go => writers/batch.go | 130 +++++++++++---- writers/batch_test.go | 56 +++++++ 20 files changed, 342 insertions(+), 675 deletions(-) create mode 100644 helpers/internal_columns.go rename {plugin => internal/memdb}/memdb.go (67%) rename {plugin => internal/memdb}/memdb_test.go (54%) delete mode 100644 plugin/metrics.go delete mode 100644 plugin/metrics_test.go create mode 100644 transformers/tables.go rename plugin/managed_writer.go => writers/batch.go (51%) create mode 100644 writers/batch_test.go diff --git a/helpers/internal_columns.go b/helpers/internal_columns.go new file mode 100644 index 0000000000..12668d607f --- /dev/null +++ b/helpers/internal_columns.go @@ -0,0 +1 @@ +package helpers \ No newline at end of file diff --git a/plugin/memdb.go b/internal/memdb/memdb.go similarity index 67% rename from plugin/memdb.go rename to internal/memdb/memdb.go index 8c23b430a3..13ad7f74b9 100644 --- a/plugin/memdb.go +++ b/internal/memdb/memdb.go @@ -1,4 +1,4 @@ -package plugin +package memdb import ( "context" @@ -8,6 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -35,7 +36,7 @@ func WithBlockingWrite() MemDBOption { } } -func GetNewClient(options ...MemDBOption) NewClientFunc { +func GetNewClient(options ...MemDBOption) plugin.NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -43,19 +44,19 @@ func GetNewClient(options ...MemDBOption) NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, any) (Client, error) { + return func(context.Context, zerolog.Logger, any) (plugin.Client, error) { return c, nil } } -func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (plugin.Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), }, nil } -func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (plugin.Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -84,11 +85,7 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) NewManagedSyncClient(context.Context, SyncOptions) (ManagedSyncClient, error) { - return nil, fmt.Errorf("not supported") -} - -func (c *client) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- arrow.Record) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { for _, row := range c.memoryDB[tableName] { @@ -99,7 +96,15 @@ func (c *client) Sync(ctx context.Context, options SyncOptions, res chan<- arrow return nil } -func (c *client) Migrate(_ context.Context, tables schema.Tables, migrateMode MigrateMode) error { +func (c *client) Tables(ctx context.Context) (schema.Tables, error) { + tables := make(schema.Tables, 0, len(c.tables)) + for _, table := range c.tables { + tables = append(tables, table) + } + return tables, nil +} + +func (c *client) Migrate(_ context.Context, tables schema.Tables, options plugin.MigrateOptions) error { for _, table := range tables { tableName := table.Name memTable := c.memoryDB[tableName] @@ -120,32 +125,7 @@ func (c *client) Migrate(_ context.Context, tables schema.Tables, migrateMode Mi return nil } -func (c *client) Read(_ context.Context, table *schema.Table, source string, res chan<- arrow.Record) error { - tableName := table.Name - if c.memoryDB[tableName] == nil { - return nil - } - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - if sourceColIndex == -1 { - return fmt.Errorf("table %s doesn't have source column", tableName) - } - var sortedRes []arrow.Record - c.memoryDBLock.RLock() - for _, row := range c.memoryDB[tableName] { - arr := row.Column(sourceColIndex) - if arr.(*array.String).Value(0) == source { - sortedRes = append(sortedRes, row) - } - } - c.memoryDBLock.RUnlock() - - for _, row := range sortedRes { - res <- row - } - return nil -} - -func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode, resources <-chan arrow.Record) error { +func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resources <-chan arrow.Record) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -165,7 +145,7 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if writeMode == WriteModeAppend { + if options.WriteMode == plugin.WriteModeAppend { c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) } else { c.overwrite(table, resource) @@ -175,33 +155,6 @@ func (c *client) Write(ctx context.Context, _ schema.Tables, writeMode WriteMode return nil } -func (c *client) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, resources []arrow.Record) error { - if c.errOnWrite { - return fmt.Errorf("errOnWrite") - } - if c.blockingWrite { - <-ctx.Done() - if c.errOnWrite { - return fmt.Errorf("errOnWrite") - } - return nil - } - tableName := table.Name - for _, resource := range resources { - c.memoryDBLock.Lock() - if writeMode == WriteModeAppend { - c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) - } else { - c.overwrite(table, resource) - } - c.memoryDBLock.Unlock() - } - return nil -} - -func (*client) Metrics() Metrics { - return Metrics{} -} func (c *client) Close(context.Context) error { c.memoryDB = nil diff --git a/plugin/memdb_test.go b/internal/memdb/memdb_test.go similarity index 54% rename from plugin/memdb_test.go rename to internal/memdb/memdb_test.go index 2f9f54a506..e04a23bd1a 100644 --- a/plugin/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -1,4 +1,4 @@ -package plugin +package memdb import ( "context" @@ -7,90 +7,52 @@ import ( "github.com/apache/arrow/go/v13/arrow" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/google/uuid" ) -var migrateStrategyOverwrite = MigrateStrategy{ - AddColumn: MigrateModeForce, - AddColumnNotNull: MigrateModeForce, - RemoveColumn: MigrateModeForce, - RemoveColumnNotNull: MigrateModeForce, - ChangeColumn: MigrateModeForce, +var migrateStrategyOverwrite = plugin.MigrateStrategy{ + AddColumn: plugin.MigrateModeForce, + AddColumnNotNull: plugin.MigrateModeForce, + RemoveColumn: plugin.MigrateModeForce, + RemoveColumnNotNull: plugin.MigrateModeForce, + ChangeColumn: plugin.MigrateModeForce, } -var migrateStrategyAppend = MigrateStrategy{ - AddColumn: MigrateModeForce, - AddColumnNotNull: MigrateModeForce, - RemoveColumn: MigrateModeForce, - RemoveColumnNotNull: MigrateModeForce, - ChangeColumn: MigrateModeForce, +var migrateStrategyAppend = plugin.MigrateStrategy{ + AddColumn: plugin.MigrateModeForce, + AddColumnNotNull: plugin.MigrateModeForce, + RemoveColumn: plugin.MigrateModeForce, + RemoveColumnNotNull: plugin.MigrateModeForce, + ChangeColumn: plugin.MigrateModeForce, } func TestPluginUnmanagedClient(t *testing.T) { - PluginTestSuiteRunner( + plugin.PluginTestSuiteRunner( t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient) + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewMemDBClient) }, nil, - PluginTestSuiteTests{ + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }, ) } -func TestPluginManagedClient(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter()) - }, - nil, - PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) -} - -func TestPluginManagedClientWithSmallBatchSize(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), - WithDefaultBatchSize(1), - WithDefaultBatchSizeBytes(1)) - }, nil, - PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) -} - -func TestPluginManagedClientWithLargeBatchSize(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient, WithManagedWriter(), - WithDefaultBatchSize(100000000), - WithDefaultBatchSizeBytes(100000000)) - }, - nil, - PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) -} - func TestPluginManagedClientWithCQPKs(t *testing.T) { - PluginTestSuiteRunner(t, - func() *Plugin { - return NewPlugin("test", "development", NewMemDBClient) + plugin.PluginTestSuiteRunner(t, + func() *plugin.Plugin { + return plugin.NewPlugin("test", "development", NewMemDBClient) }, pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{ PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, }, }, - PluginTestSuiteTests{ + plugin.PluginTestSuiteTests{ MigrateStrategyOverwrite: migrateStrategyOverwrite, MigrateStrategyAppend: migrateStrategyAppend, }) @@ -98,7 +60,7 @@ func TestPluginManagedClientWithCQPKs(t *testing.T) { func TestPluginOnNewError(t *testing.T) { ctx := context.Background() - p := NewPlugin("test", "development", NewMemDBClientErrOnNew) + p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) err := p.Init(ctx, nil) if err == nil { @@ -109,7 +71,7 @@ func TestPluginOnNewError(t *testing.T) { func TestOnWriteError(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithErrOnWrite()) - p := NewPlugin("test", "development", newClientFunc) + p := plugin.NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, nil); err != nil { t.Fatal(err) } @@ -144,7 +106,7 @@ func TestOnWriteError(t *testing.T) { func TestOnWriteCtxCancelled(t *testing.T) { ctx := context.Background() newClientFunc := GetNewClient(WithBlockingWrite()) - p := NewPlugin("test", "development", newClientFunc) + p := plugin.NewPlugin("test", "development", newClientFunc) if err := p.Init(ctx, pbPlugin.Spec{ WriteSpec: &pbPlugin.WriteSpec{}, }); err != nil { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 7419cf10f2..4c22750e69 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -3,7 +3,6 @@ package destination import ( "context" "encoding/json" - "fmt" "io" "github.com/apache/arrow/go/v13/arrow" @@ -212,13 +211,13 @@ func SetDestinationManagedCqColumns(tables []*schema.Table) { } func (s *Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { - stats := s.Plugin.Metrics() - b, err := json.Marshal(stats) - if err != nil { - return nil, fmt.Errorf("failed to marshal stats: %w", err) - } + // stats := s.Plugin.Metrics() + // b, err := json.Marshal(stats) + // if err != nil { + // return nil, fmt.Errorf("failed to marshal stats: %w", err) + // } return &pb.GetDestinationMetrics_Response{ - Metrics: b, + // Metrics: b, }, nil } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index d53e5e2ee9..45cfa4f7f7 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/json" - "fmt" "io" "github.com/apache/arrow/go/v13/arrow" @@ -171,13 +170,13 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { } func (s *Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { - stats := s.Plugin.Metrics() - b, err := json.Marshal(stats) - if err != nil { - return nil, fmt.Errorf("failed to marshal stats: %w", err) - } + // stats := s.Plugin.Metrics() + // b, err := json.Marshal(stats) + // if err != nil { + // return nil, fmt.Errorf("failed to marshal stats: %w", err) + // } return &pb.GetDestinationMetrics_Response{ - Metrics: b, + // Metrics: b, }, nil } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index fa432af917..00e2543d99 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -33,30 +33,17 @@ type Server struct { NoSentry bool } -func (s *Server) GetStaticTables(context.Context, *pb.GetStaticTables_Request) (*pb.GetStaticTables_Response, error) { - tables := s.Plugin.StaticTables().ToArrowSchemas() +func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTables_Response, error) { + tables := s.Plugin.Tables().ToArrowSchemas() encoded, err := tables.Encode() if err != nil { return nil, fmt.Errorf("failed to encode tables: %w", err) } - return &pb.GetStaticTables_Response{ + return &pb.GetTables_Response{ Tables: encoded, }, nil } -func (s *Server) GetDynamicTables(context.Context, *pb.GetDynamicTables_Request) (*pb.GetDynamicTables_Response, error) { - tables := s.Plugin.DynamicTables() - if tables == nil { - return &pb.GetDynamicTables_Response{}, nil - } - encoded, err := tables.ToArrowSchemas().Encode() - if err != nil { - return nil, fmt.Errorf("failed to encode tables: %w", err) - } - return &pb.GetDynamicTables_Response{ - Tables: encoded, - }, nil -} func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { return &pb.GetName_Response{ @@ -86,10 +73,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { Tables: req.Tables, SkipTables: req.SkipTables, Concurrency: req.Concurrency, - Scheduler: plugin.SchedulerDFS, - } - if req.Scheduler == pb.SCHEDULER_SCHEDULER_ROUND_ROBIN { - syncOptions.Scheduler = plugin.SchedulerRoundRobin } // sourceName := req.SourceName diff --git a/plugin/docs.go b/plugin/docs.go index b100ea649c..6e4dccf581 100644 --- a/plugin/docs.go +++ b/plugin/docs.go @@ -2,6 +2,7 @@ package plugin import ( "bytes" + "context" "embed" "encoding/json" "fmt" @@ -83,7 +84,10 @@ func (p *Plugin) GeneratePluginDocs(dir string, format pbPlugin.GenDocs_FORMAT) if err := os.MkdirAll(dir, os.ModePerm); err != nil { return err } - tables := p.staticTables + tables, err := p.Tables(context.Background()) + if err != nil { + return err + } setDestinationManagedCqColumns(tables) sortedTables := make(schema.Tables, 0, len(tables)) diff --git a/plugin/metrics.go b/plugin/metrics.go deleted file mode 100644 index 8ba88823b9..0000000000 --- a/plugin/metrics.go +++ /dev/null @@ -1,125 +0,0 @@ -package plugin - -import ( - "sync/atomic" - "time" - - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -type Metrics struct { - TableClient map[string]map[string]*TableClientMetrics -} - -type TableClientMetrics struct { - Resources uint64 - Errors uint64 - Panics uint64 - StartTime time.Time - EndTime time.Time -} - -func (s *TableClientMetrics) Equal(other *TableClientMetrics) bool { - return s.Resources == other.Resources && s.Errors == other.Errors && s.Panics == other.Panics -} - -// Equal compares to stats. Mostly useful in testing -func (s *Metrics) Equal(other *Metrics) bool { - for table, clientStats := range s.TableClient { - for client, stats := range clientStats { - if _, ok := other.TableClient[table]; !ok { - return false - } - if _, ok := other.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(other.TableClient[table][client]) { - return false - } - } - } - for table, clientStats := range other.TableClient { - for client, stats := range clientStats { - if _, ok := s.TableClient[table]; !ok { - return false - } - if _, ok := s.TableClient[table][client]; !ok { - return false - } - if !stats.Equal(s.TableClient[table][client]) { - return false - } - } - } - return true -} - -func (s *Metrics) initWithClients(table *schema.Table, clients []schema.ClientMeta) { - s.TableClient[table.Name] = make(map[string]*TableClientMetrics, len(clients)) - for _, client := range clients { - s.TableClient[table.Name][client.ID()] = &TableClientMetrics{} - } - for _, relation := range table.Relations { - s.initWithClients(relation, clients) - } -} - -func (s *Metrics) TotalErrors() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Errors - } - } - return total -} - -func (s *Metrics) TotalErrorsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Errors) - } - } - return total -} - -func (s *Metrics) TotalPanics() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Panics - } - } - return total -} - -func (s *Metrics) TotalPanicsAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Panics) - } - } - return total -} - -func (s *Metrics) TotalResources() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += metrics.Resources - } - } - return total -} - -func (s *Metrics) TotalResourcesAtomic() uint64 { - var total uint64 - for _, clientMetrics := range s.TableClient { - for _, metrics := range clientMetrics { - total += atomic.LoadUint64(&metrics.Resources) - } - } - return total -} diff --git a/plugin/metrics_test.go b/plugin/metrics_test.go deleted file mode 100644 index a566edee5d..0000000000 --- a/plugin/metrics_test.go +++ /dev/null @@ -1,37 +0,0 @@ -package plugin - -import "testing" - -func TestMetrics(t *testing.T) { - s := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - s.TableClient["test_table"] = make(map[string]*TableClientMetrics) - s.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if s.TotalResources() != 1 { - t.Fatal("expected 1 resource") - } - if s.TotalErrors() != 2 { - t.Fatal("expected 2 error") - } - if s.TotalPanics() != 3 { - t.Fatal("expected 3 panics") - } - - other := &Metrics{ - TableClient: make(map[string]map[string]*TableClientMetrics), - } - other.TableClient["test_table"] = make(map[string]*TableClientMetrics) - other.TableClient["test_table"]["testExecutionClient"] = &TableClientMetrics{ - Resources: 1, - Errors: 2, - Panics: 3, - } - if !s.Equal(other) { - t.Fatal("expected metrics to be equal") - } -} diff --git a/plugin/options.go b/plugin/options.go index 66a13b69f7..aaa8687a51 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,10 +1,7 @@ package plugin import ( - "bytes" - "context" "fmt" - "time" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -65,46 +62,8 @@ func (m WriteMode) String() string { return writeModeStrings[m] } -type Scheduler int - -const ( - SchedulerDFS Scheduler = iota - SchedulerRoundRobin -) - -var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} -var AllSchedulerNames = [...]string{ - SchedulerDFS: "dfs", - SchedulerRoundRobin: "round-robin", -} - -type Schedulers []Scheduler - -func (s Schedulers) String() string { - var buffer bytes.Buffer - for i, scheduler := range s { - if i > 0 { - buffer.WriteString(", ") - } - buffer.WriteString(scheduler.String()) - } - return buffer.String() -} - -func (s Scheduler) String() string { - return AllSchedulerNames[s] -} - -type GetTables func(ctx context.Context, c Client) (schema.Tables, error) - type Option func(*Plugin) -// WithDynamicTable allows the plugin to return list of tables after call to New -func WithDynamicTable(getDynamicTables GetTables) Option { - return func(p *Plugin) { - p.getDynamicTables = getDynamicTables - } -} // WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables func WithNoInternalColumns() Option { @@ -119,34 +78,4 @@ func WithTitleTransformer(t func(*schema.Table) string) Option { return func(p *Plugin) { p.titleTransformer = t } -} - -func WithStaticTables(tables schema.Tables) Option { - return func(p *Plugin) { - p.staticTables = tables - } -} - -func WithManagedWriter() Option { - return func(p *Plugin) { - p.managedWriter = true - } -} - -func WithBatchTimeout(seconds int) Option { - return func(p *Plugin) { - p.batchTimeout = time.Duration(seconds) * time.Second - } -} - -func WithDefaultBatchSize(defaultBatchSize int) Option { - return func(p *Plugin) { - p.defaultBatchSize = defaultBatchSize - } -} - -func WithDefaultBatchSizeBytes(defaultBatchSizeBytes int) Option { - return func(p *Plugin) { - p.defaultBatchSizeBytes = defaultBatchSizeBytes - } -} +} \ No newline at end of file diff --git a/plugin/plugin.go b/plugin/plugin.go index 44a8731fec..452d650196 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -13,23 +13,13 @@ import ( "golang.org/x/sync/semaphore" ) -const ( - defaultBatchTimeoutSeconds = 20 - defaultBatchSize = 10000 - defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB -) - type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) -type ManagedSyncClient interface { - ID() string -} - type Client interface { + Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error - Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error - WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error - Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error + Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateOptions) error + Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error Close(ctx context.Context) error } @@ -44,10 +34,6 @@ func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, writ return fmt.Errorf("not implemented") } -func (UnimplementedWriter) WriteTableBatch(ctx context.Context, table *schema.Table, writeMode WriteMode, data []arrow.Record) error { - return fmt.Errorf("not implemented") -} - func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { return fmt.Errorf("not implemented") } @@ -58,10 +44,8 @@ func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan return fmt.Errorf("not implemented") } -type UnimplementedRead struct{} - -func (UnimplementedRead) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return fmt.Errorf("not implemented") +func (UnimplementedSync) Tables(ctx context.Context) (schema.Tables, error) { + return nil, fmt.Errorf("not implemented") } // Plugin is the base structure required to pass to sdk.serve @@ -73,12 +57,6 @@ type Plugin struct { version string // Called upon init call to validate and init configuration newClient NewClientFunc - // dynamic table function if specified - getDynamicTables GetTables - // Tables are static tables that defined in compile time by the plugin - staticTables schema.Tables - // status sync metrics - metrics *Metrics // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. logger zerolog.Logger // resourceSem is a semaphore that limits the number of concurrent resources being fetched @@ -91,87 +69,24 @@ type Plugin struct { caser *caser.Caser // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) mu sync.Mutex - // client is the initialized session client client Client - // sessionTables are the - sessionTables schema.Tables // spec is the spec the client was initialized with spec any // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id // useful for sources such as PostgreSQL and other databases internalColumns bool - // unmanagedSync if set to true then the plugin will call Sync directly and not use the scheduler - unmanagedSync bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation titleTransformer func(*schema.Table) string syncTime time.Time sourceName string deterministicCQId bool - - managedWriter bool - workers map[string]*worker - workersLock *sync.Mutex - - batchTimeout time.Duration - defaultBatchSize int - defaultBatchSizeBytes int } const ( maxAllowedDepth = 4 ) -// Add internal columns -func (p *Plugin) addInternalColumns(tables []*schema.Table) error { - for _, table := range tables { - if c := table.Column("_cq_id"); c != nil { - return fmt.Errorf("table %s already has column _cq_id", table.Name) - } - cqID := schema.CqIDColumn - if len(table.PrimaryKeys()) == 0 { - cqID.PrimaryKey = true - } - cqSourceName := schema.CqSourceNameColumn - cqSyncTime := schema.CqSyncTimeColumn - cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.sourceName) - } - cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.syncTime) - } - - table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) - if err := p.addInternalColumns(table.Relations); err != nil { - return err - } - } - return nil -} - -// Set parent links on relational tables -func setParents(tables schema.Tables, parent *schema.Table) { - for _, table := range tables { - table.Parent = parent - setParents(table.Relations, table) - } -} - -// Apply transformations to tables -func transformTables(tables schema.Tables) error { - for _, table := range tables { - if table.Transform != nil { - if err := table.Transform(table); err != nil { - return fmt.Errorf("failed to transform table %s: %w", table.Name, err) - } - } - if err := transformTables(table.Relations); err != nil { - return err - } - } - return nil -} - func maxDepth(tables schema.Tables) uint64 { var depth uint64 if len(tables) == 0 { @@ -190,41 +105,16 @@ func maxDepth(tables schema.Tables) uint64 { // Depending on the options, it can be write only plugin, read only plugin or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ - name: name, - version: version, - internalColumns: true, - caser: caser.New(), - titleTransformer: DefaultTitleTransformer, - newClient: newClient, - metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, - workers: make(map[string]*worker), - workersLock: &sync.Mutex{}, - batchTimeout: time.Duration(defaultBatchTimeoutSeconds) * time.Second, - defaultBatchSize: defaultBatchSize, - defaultBatchSizeBytes: defaultBatchSizeBytes, + name: name, + version: version, + internalColumns: true, + caser: caser.New(), + titleTransformer: DefaultTitleTransformer, + newClient: newClient, } for _, opt := range options { opt(&p) } - if p.staticTables != nil { - setParents(p.staticTables, nil) - if err := transformTables(p.staticTables); err != nil { - panic(err) - } - if p.internalColumns { - if err := p.addInternalColumns(p.staticTables); err != nil { - panic(err) - } - } - p.maxDepth = maxDepth(p.staticTables) - if p.maxDepth > maxAllowedDepth { - panic(fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth)) - } - if err := p.validate(p.staticTables); err != nil { - panic(err) - } - } - return &p } @@ -242,8 +132,28 @@ func (p *Plugin) SetLogger(logger zerolog.Logger) { p.logger = logger.With().Str("module", p.name+"-src").Logger() } -func (p *Plugin) Metrics() *Metrics { - return p.metrics +func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { + tables, err := p.client.Tables(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get tables: %w", err) + } + setParents(tables, nil) + if err := transformTables(tables); err != nil { + return nil, err + } + if p.internalColumns { + if err := p.addInternalColumns(tables); err != nil { + return nil, err + } + } + p.maxDepth = maxDepth(tables) + if p.maxDepth > maxAllowedDepth { + return nil, fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) + } + if err := p.validate(tables); err != nil { + return nil, err + } + return tables, nil } // Init initializes the plugin with the given spec. diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index e040976da0..da76a3ed97 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -6,7 +6,6 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" ) @@ -15,7 +14,6 @@ type SyncOptions struct { Tables []string SkipTables []string Concurrency int64 - Scheduler Scheduler DeterministicCQID bool // SyncTime if specified then this will be add to every table as _sync_time column SyncTime time.Time @@ -26,7 +24,6 @@ type SyncOptions struct { type ReadOnlyClient interface { Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error - Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error Close(ctx context.Context) error } @@ -51,18 +48,6 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } -// Tables returns all tables supported by this source plugin -func (p *Plugin) StaticTables() schema.Tables { - return p.staticTables -} - -func (p *Plugin) HasDynamicTables() bool { - return p.getDynamicTables != nil -} - -func (p *Plugin) DynamicTables() schema.Tables { - return p.sessionTables -} func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error @@ -86,12 +71,12 @@ func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow } defer p.mu.Unlock() p.syncTime = options.SyncTime - startTime := time.Now() + // startTime := time.Now() if err := p.client.Sync(ctx, options, res); err != nil { return fmt.Errorf("failed to sync unmanaged client: %w", err) } - p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") + // p.logger.Info().Uint64("resources", p.metrics.TotalResources()).Uint64("errors", p.metrics.TotalErrors()).Uint64("panics", p.metrics.TotalPanics()).TimeDiff("duration", time.Now(), startTime).Msg("sync finished") return nil } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index 6fe3d0aa7e..d0016a113b 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -11,7 +11,7 @@ import ( func TestPluginUnmanagedSync(t *testing.T) { ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewMemDBClient, WithUnmanagedSync()) + p := NewPlugin("test", "v0.0.0", NewMemDBClient) testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) syncTime := time.Now().UTC() sourceName := "test" @@ -74,30 +74,3 @@ func TestPluginUnmanagedSync(t *testing.T) { t.Fatal(err) } } - -// func TestPluginInit(t *testing.T) { -// const ( -// batchSize = uint64(100) -// batchSizeBytes = uint64(1000) -// ) - -// var ( -// batchSizeObserved uint64 -// batchSizeBytesObserved uint64 -// ) -// p := NewPlugin( -// "test", -// "development", -// func(ctx context.Context, logger zerolog.Logger, s any) (Client, error) { -// batchSizeObserved = s.WriteSpec.BatchSize -// batchSizeBytesObserved = s.WriteSpec.BatchSizeBytes -// return NewMemDBClient(ctx, logger, s) -// }, -// WithDefaultBatchSize(int(batchSize)), -// WithDefaultBatchSizeBytes(int(batchSizeBytes)), -// ) -// require.NoError(t, p.Init(context.TODO(), nil)) - -// require.Equal(t, batchSize, batchSizeObserved) -// require.Equal(t, batchSizeBytes, batchSizeBytesObserved) -// } diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index af37b8df48..d0420182be 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -9,21 +9,33 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { +type WriteOptions struct { + // WriteMode is the mode to write to the database + WriteMode WriteMode + // Predefined tables are available if tables are known at the start of the write + Tables schema.Tables +} + +type MigrateOptions struct { + // MigrateMode is the mode to migrate the database + MigrateMode MigrateMode +} + +func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, options MigrateOptions) error { if p.client == nil { return fmt.Errorf("plugin is not initialized") } - return p.client.Migrate(ctx, tables, migrateMode) + return p.client.Migrate(ctx, tables, options) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resource arrow.Record) error { +func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource arrow.Record) error { resources := []arrow.Record{resource} - return p.writeAll(ctx, sourceName, syncTime, writeMode, resources) + return p.writeAll(ctx, options, resources) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, sourceName string, syncTime time.Time, writeMode WriteMode, resources []arrow.Record) error { +func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []arrow.Record) error { ch := make(chan arrow.Record, len(resources)) for _, resource := range resources { ch <- resource @@ -48,21 +60,14 @@ func (p *Plugin) writeAll(ctx context.Context, sourceName string, syncTime time. tables = append(tables, table) tableNames[table.Name] = struct{}{} } - return p.Write(ctx, sourceName, tables, syncTime, writeMode, ch) + options.Tables = tables + return p.Write(ctx, options, ch) } -func (p *Plugin) Write(ctx context.Context, sourceName string, tables schema.Tables, syncTime time.Time, writeMode WriteMode, res <-chan arrow.Record) error { - syncTime = syncTime.UTC() - if p.managedWriter { - if err := p.writeManagedTableBatch(ctx, tables, writeMode, res); err != nil { - return err - } - } else { - if err := p.client.Write(ctx, tables, writeMode, res); err != nil { - return err - } +func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error { + if err := p.client.Write(ctx, options, res); err != nil { + return err } - return nil } diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 228582ef7a..d9c2654634 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -134,14 +134,14 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { return nil } -// func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { -// clientName := client.ID() -// for _, table := range tables { -// metrics := p.metrics.TableClient[table.Name][clientName] -// p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") -// p.logTablesMetrics(table.Relations, client) -// } -// } +func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { + clientName := client.ID() + for _, table := range tables { + metrics := p.metrics.TableClient[table.Name][clientName] + p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") + p.logTablesMetrics(table.Relations, client) + } +} func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { var validationErr *schema.ValidationError @@ -245,26 +245,6 @@ func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, ta } } -// func (p *Scheduler) periodicMetricLogger(ctx context.Context, wg *sync.WaitGroup) { -// defer wg.Done() - -// ticker := time.NewTicker(periodicMetricLoggerInterval) -// defer ticker.Stop() - -// for { -// select { -// case <-ctx.Done(): -// return -// case <-ticker.C: -// p.logger.Info(). -// Uint64("total_resources", p.metrics.TotalResourcesAtomic()). -// Uint64("total_errors", p.metrics.TotalErrorsAtomic()). -// Uint64("total_panics", p.metrics.TotalPanicsAtomic()). -// Msg("Sync in progress") -// } -// } -// } - // unparam's suggestion to remove the second parameter is not good advice here. // nolint:unparam func max(a, b uint64) uint64 { diff --git a/scheduler/scheduler_dfs.go b/scheduler/scheduler_dfs.go index 7789dd34e4..f0d465684f 100644 --- a/scheduler/scheduler_dfs.go +++ b/scheduler/scheduler_dfs.go @@ -55,14 +55,6 @@ func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schem s.metrics.initWithClients(table, clients) } - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - // var logWg sync.WaitGroup - // logWg.Add(1) - - // logCtx, logCancel := context.WithCancel(ctx) - // go s.periodicMetricLogger(logCtx, &logWg) - var wg sync.WaitGroup for i, table := range s.tables { table := table @@ -72,9 +64,6 @@ func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schem if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() return } wg.Add(1) @@ -90,10 +79,6 @@ func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schem // Wait for all the worker goroutines to finish wg.Wait() - - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() } func (s *Scheduler) resolveTableDfs(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, resolvedResources chan<- *schema.Resource, depth int) { @@ -140,7 +125,7 @@ func (s *Scheduler) resolveTableDfs(ctx context.Context, table *schema.Table, cl // we don't need any waitgroups here because we are waiting for the channel to close if parent == nil { // Log only for root tables and relations only after resolving is done, otherwise we spam per object instead of per table. logger.Info().Uint64("resources", tableMetrics.Resources).Uint64("errors", tableMetrics.Errors).Msg("table sync finished") - // s.logTablesMetrics(table.Relations, client) + s.logTablesMetrics(table.Relations, client) } } diff --git a/scheduler/scheduler_round_robin.go b/scheduler/scheduler_round_robin.go index 104e8f4514..43bd337862 100644 --- a/scheduler/scheduler_round_robin.go +++ b/scheduler/scheduler_round_robin.go @@ -39,14 +39,6 @@ func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- s.metrics.initWithClients(table, clients) } - // We start a goroutine that logs the metrics periodically. - // It needs its own waitgroup - // var logWg sync.WaitGroup - // logWg.Add(1) - - // logCtx, logCancel := context.WithCancel(ctx) - // go p.periodicMetricLogger(logCtx, &logWg) - tableClients := roundRobinInterleave(s.tables, preInitialisedClients) var wg sync.WaitGroup @@ -56,9 +48,6 @@ func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- if err := s.tableSems[0].Acquire(ctx, 1); err != nil { // This means context was cancelled wg.Wait() - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() return } wg.Add(1) @@ -75,10 +64,6 @@ func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- // Wait for all the worker goroutines to finish wg.Wait() - - // gracefully shut down the logger goroutine - // logCancel() - // logWg.Wait() } // interleave table-clients so that we get: diff --git a/transformers/tables.go b/transformers/tables.go new file mode 100644 index 0000000000..94532c6ca3 --- /dev/null +++ b/transformers/tables.go @@ -0,0 +1,58 @@ +package transformers + +import ( + "context" + "fmt" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +// Set parent links on relational tables +func setParents(tables schema.Tables, parent *schema.Table) { + for _, table := range tables { + table.Parent = parent + setParents(table.Relations, table) + } +} + +// Add internal columns +func AddInternalColumns(tables []*schema.Table) error { + for _, table := range tables { + if c := table.Column("_cq_id"); c != nil { + return fmt.Errorf("table %s already has column _cq_id", table.Name) + } + cqID := schema.CqIDColumn + if len(table.PrimaryKeys()) == 0 { + cqID.PrimaryKey = true + } + cqSourceName := schema.CqSourceNameColumn + cqSyncTime := schema.CqSyncTimeColumn + cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.sourceName) + } + cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + return resource.Set(c.Name, p.syncTime) + } + + table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) + if err := AddInternalColumns(table.Relations); err != nil { + return err + } + } + return nil +} + +// Apply transformations to tables +func TransformTables(tables schema.Tables) error { + for _, table := range tables { + if table.Transform != nil { + if err := table.Transform(table); err != nil { + return fmt.Errorf("failed to transform table %s: %w", table.Name, err) + } + } + if err := TransformTables(table.Relations); err != nil { + return err + } + } + return nil +} \ No newline at end of file diff --git a/plugin/managed_writer.go b/writers/batch.go similarity index 51% rename from plugin/managed_writer.go rename to writers/batch.go index 6af7fd0004..186643aaf0 100644 --- a/plugin/managed_writer.go +++ b/writers/batch.go @@ -1,4 +1,4 @@ -package plugin +package writers import ( "context" @@ -10,8 +10,57 @@ import ( "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/internal/pk" "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/rs/zerolog" ) +const ( + defaultBatchTimeoutSeconds = 20 + defaultBatchSize = 10000 + defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB +) + +type BatchWriterClient interface { + WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error +} + +type BatchWriter struct { + tables schema.Tables + client BatchWriterClient + workers map[string]*worker + workersLock *sync.Mutex + + logger zerolog.Logger + batchTimeout time.Duration + batchSize int + batchSizeBytes int +} + +type Option func(*BatchWriter) + +func WithLogger(logger zerolog.Logger) Option { + return func(p *BatchWriter) { + p.logger = logger + } +} + +func WithBatchTimeout(timeout time.Duration) Option { + return func(p *BatchWriter) { + p.batchTimeout = timeout + } +} + +func WithBatchSize(size int) Option { + return func(p *BatchWriter) { + p.batchSize = size + } +} + +func WithBatchSizeBytes(size int) Option { + return func(p *BatchWriter) { + p.batchSizeBytes = size + } +} + type worker struct { count int wg *sync.WaitGroup @@ -19,7 +68,24 @@ type worker struct { flush chan chan bool } -func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, ch <-chan arrow.Record, flush <-chan chan bool) { +func NewBatchWriter(tables schema.Tables, client BatchWriterClient, opts ...Option) (*BatchWriter, error) { + c := &BatchWriter{ + tables: tables, + client: client, + workers: make(map[string]*worker), + workersLock: &sync.Mutex{}, + logger: zerolog.Nop(), + batchTimeout: defaultBatchTimeoutSeconds * time.Second, + batchSize: defaultBatchSize, + batchSizeBytes: defaultBatchSizeBytes, + } + for _, opt := range opts { + opt(c) + } + return c, nil +} + +func (w *BatchWriter) worker(ctx context.Context, table *schema.Table, ch <-chan arrow.Record, flush <-chan chan bool) { sizeBytes := int64(0) resources := make([]arrow.Record, 0) for { @@ -27,26 +93,26 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab case r, ok := <-ch: if !ok { if len(resources) > 0 { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) } return } if uint64(len(resources)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) resources = make([]arrow.Record, 0) sizeBytes = 0 } resources = append(resources, r) sizeBytes += util.TotalRecordSize(r) - case <-time.After(p.batchTimeout): + case <-time.After(w.batchTimeout): if len(resources) > 0 { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) resources = make([]arrow.Record, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - p.flush(ctx, metrics, table, writeMode, resources) + w.flush(ctx, table, resources) resources = make([]arrow.Record, 0) sizeBytes = 0 } @@ -58,21 +124,18 @@ func (p *Plugin) worker(ctx context.Context, metrics *Metrics, table *schema.Tab } } -func (p *Plugin) flush(ctx context.Context, metrics *Metrics, table *schema.Table, writeMode WriteMode, resources []arrow.Record) { - resources = p.removeDuplicatesByPK(table, resources) +func (w *BatchWriter) flush(ctx context.Context, table *schema.Table, resources []arrow.Record) { + resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) - if err := p.client.WriteTableBatch(ctx, table, writeMode, resources); err != nil { - p.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") - // we don't return an error as we need to continue until channel is closed otherwise there will be a deadlock - // atomic.AddUint64(&metrics.Errors, uint64(batchSize)) + if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { + w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") } else { - p.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") - // atomic.AddUint64(&metrics.Writes, uint64(batchSize)) + w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") } } -func (*Plugin) removeDuplicatesByPK(table *schema.Table, resources []arrow.Record) []arrow.Record { +func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow.Record) []arrow.Record { pkIndices := table.PrimaryKeysIndexes() // special case where there's no PK at all if len(pkIndices) == 0 { @@ -99,18 +162,17 @@ func (*Plugin) removeDuplicatesByPK(table *schema.Table, resources []arrow.Recor return res } -func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { - workers := make(map[string]*worker, len(tables)) - metrics := &Metrics{} +func (w *BatchWriter) Write(ctx context.Context, res <-chan arrow.Record) error { + workers := make(map[string]*worker, len(w.tables)) - p.workersLock.Lock() - for _, table := range tables { + w.workersLock.Lock() + for _, table := range w.tables { table := table - if p.workers[table.Name] == nil { + if w.workers[table.Name] == nil { ch := make(chan arrow.Record) flush := make(chan chan bool) wg := &sync.WaitGroup{} - p.workers[table.Name] = &worker{ + w.workers[table.Name] = &worker{ count: 1, ch: ch, flush: flush, @@ -119,16 +181,16 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Table wg.Add(1) go func() { defer wg.Done() - p.worker(ctx, metrics, table, writeMode, ch, flush) + w.worker(ctx, table, ch, flush) }() } else { - p.workers[table.Name].count++ + w.workers[table.Name].count++ } // we save this locally because we don't want to access the map after that so we can // keep the workersLock for as short as possible - workers[table.Name] = p.workers[table.Name] + workers[table.Name] = w.workers[table.Name] } - p.workersLock.Unlock() + w.workersLock.Unlock() for r := range res { tableName, ok := r.Schema().Metadata().GetValue(schema.MetadataTableName) @@ -153,15 +215,15 @@ func (p *Plugin) writeManagedTableBatch(ctx context.Context, tables schema.Table <-flushChannels[tableName] } - p.workersLock.Lock() + w.workersLock.Lock() for tableName := range workers { - p.workers[tableName].count-- - if p.workers[tableName].count == 0 { - close(p.workers[tableName].ch) - p.workers[tableName].wg.Wait() - delete(p.workers, tableName) + w.workers[tableName].count-- + if w.workers[tableName].count == 0 { + close(w.workers[tableName].ch) + w.workers[tableName].wg.Wait() + delete(w.workers, tableName) } } - p.workersLock.Unlock() + w.workersLock.Unlock() return nil } diff --git a/writers/batch_test.go b/writers/batch_test.go new file mode 100644 index 0000000000..0ca94fc1ad --- /dev/null +++ b/writers/batch_test.go @@ -0,0 +1,56 @@ +package writers + +import ( + "context" + "testing" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type testBatchClient struct { +} + +func (c *testBatchClient) WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error { + return nil +} + +func TestBatchWriter(t *testing.T) { + ctx := context.Background() + tables := schema.Tables{ + { + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + { + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + } + + wr, err := NewBatchWriter(tables, &testBatchClient{}) + if err != nil { + t.Fatal(err) + } + ch := make(chan arrow.Record, 1) + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) + bldr.Field(0).(*array.Int64Builder).Append(1) + ch <- bldr.NewRecord() + close(ch) + if err := wr.Write(ctx, ch); err != nil { + t.Fatal(err) + } +} From 6ed42c07c7353b139954e28c571ae095037de358 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 5 Jun 2023 23:21:18 +0300 Subject: [PATCH 063/125] wip --- plugin/plugin.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/plugin/plugin.go b/plugin/plugin.go index 452d650196..d52fcdf2ae 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -7,10 +7,8 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v4/caser" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" - "golang.org/x/sync/semaphore" ) type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) @@ -59,14 +57,8 @@ type Plugin struct { newClient NewClientFunc // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. logger zerolog.Logger - // resourceSem is a semaphore that limits the number of concurrent resources being fetched - resourceSem *semaphore.Weighted - // tableSem is a semaphore that limits the number of concurrent tables being fetched - tableSems []*semaphore.Weighted // maxDepth is the max depth of tables maxDepth uint64 - // caser - caser *caser.Caser // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) mu sync.Mutex // client is the initialized session client @@ -78,9 +70,6 @@ type Plugin struct { internalColumns bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation titleTransformer func(*schema.Table) string - syncTime time.Time - sourceName string - deterministicCQId bool } const ( @@ -108,7 +97,6 @@ func NewPlugin(name string, version string, newClient NewClientFunc, options ... name: name, version: version, internalColumns: true, - caser: caser.New(), titleTransformer: DefaultTitleTransformer, newClient: newClient, } From ade262961fdff4e724b23deb09c8ae82219ea1ef Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Tue, 6 Jun 2023 01:20:31 +0300 Subject: [PATCH 064/125] more wip --- docs/docs.go | 137 ++++++++++ {plugin => docs}/docs_test.go | 10 +- docs/json.go | 62 +++++ docs/markdown.go | 94 +++++++ .../templates/all_tables.md.go.tpl | 0 .../templates/all_tables_entry.md.go.tpl | 0 {plugin => docs}/templates/table.md.go.tpl | 0 .../TestGeneratePluginDocs-JSON-__tables.json | 0 .../TestGeneratePluginDocs-Markdown-README.md | 0 ...tePluginDocs-Markdown-incremental_table.md | 0 ...Docs-Markdown-relation_relation_table_a.md | 0 ...Docs-Markdown-relation_relation_table_b.md | 0 ...eratePluginDocs-Markdown-relation_table.md | 0 ...tGeneratePluginDocs-Markdown-test_table.md | 0 helpers/internal_columns.go | 2 +- internal/memdb/memdb.go | 1 - internal/servers/plugin/v3/plugin.go | 37 --- plugin/docs.go | 246 ------------------ plugin/options.go | 30 +-- plugin/plugin.go | 2 +- plugin/plugin_reader.go | 1 - .../benchmark_test.go.backup | 0 serve/docs.go | 47 ++++ serve/docs_test.go | 1 + serve/plugin.go | 30 --- transformers/tables.go | 2 +- 26 files changed, 349 insertions(+), 353 deletions(-) create mode 100644 docs/docs.go rename {plugin => docs}/docs_test.go (92%) create mode 100644 docs/json.go create mode 100644 docs/markdown.go rename {plugin => docs}/templates/all_tables.md.go.tpl (100%) rename {plugin => docs}/templates/all_tables_entry.md.go.tpl (100%) rename {plugin => docs}/templates/table.md.go.tpl (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-JSON-__tables.json (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-README.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-relation_table.md (100%) rename {plugin => docs}/testdata/TestGeneratePluginDocs-Markdown-test_table.md (100%) delete mode 100644 plugin/docs.go rename {plugin => scheduler}/benchmark_test.go.backup (100%) create mode 100644 serve/docs.go create mode 100644 serve/docs_test.go diff --git a/docs/docs.go b/docs/docs.go new file mode 100644 index 0000000000..62dba4f67b --- /dev/null +++ b/docs/docs.go @@ -0,0 +1,137 @@ +package docs + +import ( + "embed" + "fmt" + "os" + "regexp" + "sort" + + "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +//go:embed templates/*.go.tpl +var templatesFS embed.FS + +var reMatchNewlines = regexp.MustCompile(`\n{3,}`) +var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) + +var DefaultTitleExceptions = map[string]string{ + // common abbreviations + "acl": "ACL", + "acls": "ACLs", + "api": "API", + "apis": "APIs", + "ca": "CA", + "cidr": "CIDR", + "cidrs": "CIDRs", + "db": "DB", + "dbs": "DBs", + "dhcp": "DHCP", + "iam": "IAM", + "iot": "IOT", + "ip": "IP", + "ips": "IPs", + "ipv4": "IPv4", + "ipv6": "IPv6", + "mfa": "MFA", + "ml": "ML", + "oauth": "OAuth", + "vpc": "VPC", + "vpcs": "VPCs", + "vpn": "VPN", + "vpns": "VPNs", + "waf": "WAF", + "wafs": "WAFs", + + // cloud providers + "aws": "AWS", + "gcp": "GCP", +} + +type Format int + +const ( + FormatMarkdown Format = iota + FormatJSON +) + +func (r Format) String() string { + return [...]string{"markdown", "json"}[r] +} + +func FormatFromString(s string) (Format, error) { + switch s { + case "markdown": + return FormatMarkdown, nil + case "json": + return FormatJSON, nil + default: + return FormatMarkdown, fmt.Errorf("unknown format %s", s) + } +} + +type Generator struct { + tables schema.Tables + titleTransformer func(*schema.Table) string + pluginName string +} + +func DefaultTitleTransformer(table *schema.Table) string { + if table.Title != "" { + return table.Title + } + csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) + return csr.ToTitle(table.Name) +} + +func sortTables(tables schema.Tables) { + sort.SliceStable(tables, func(i, j int) bool { + return tables[i].Name < tables[j].Name + }) + + for _, table := range tables { + sortTables(table.Relations) + } +} + +// NewGenerator creates a new generator for the given tables. +// The tables are sorted by name. pluginName is optional and is used in markdown only +func NewGenerator(pluginName string, tables schema.Tables) *Generator { + sortedTables := make(schema.Tables, 0, len(tables)) + for _, t := range tables { + sortedTables = append(sortedTables, t.Copy(nil)) + } + sortTables(sortedTables) + + return &Generator{ + tables: sortedTables, + titleTransformer: DefaultTitleTransformer, + pluginName: pluginName, + } +} + +func (g *Generator) Generate(dir string, format Format) error { + if err := os.MkdirAll(dir, os.ModePerm); err != nil { + return err + } + + switch format { + case FormatMarkdown: + return g.renderTablesAsMarkdown(dir) + case FormatJSON: + return g.renderTablesAsJSON(dir) + default: + return fmt.Errorf("unsupported format: %v", format) + } +} + +// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). +// func setDestinationManagedCqColumns(tables []*schema.Table) { +// for _, table := range tables { +// table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) +// table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) +// setDestinationManagedCqColumns(table.Relations) +// } +// } diff --git a/plugin/docs_test.go b/docs/docs_test.go similarity index 92% rename from plugin/docs_test.go rename to docs/docs_test.go index 878e006e88..22d4001719 100644 --- a/plugin/docs_test.go +++ b/docs/docs_test.go @@ -1,6 +1,6 @@ //go:build !windows -package plugin +package docs import ( "os" @@ -9,7 +9,6 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/bradleyjkemp/cupaloy/v2" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/stretchr/testify/require" @@ -121,14 +120,13 @@ var testTables = []*schema.Table{ } func TestGeneratePluginDocs(t *testing.T) { - p := NewPlugin("test", "v1.0.0", newTestExecutionClient, WithStaticTables(testTables)) - + g := NewGenerator("test", testTables) cup := cupaloy.New(cupaloy.SnapshotSubdirectory("testdata")) t.Run("Markdown", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_MARKDOWN) + err := g.Generate(tmpdir, FormatMarkdown) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } @@ -147,7 +145,7 @@ func TestGeneratePluginDocs(t *testing.T) { t.Run("JSON", func(t *testing.T) { tmpdir := t.TempDir() - err := p.GeneratePluginDocs(tmpdir, pbPlugin.GenDocs_FORMAT_JSON) + err := g.Generate(tmpdir, FormatJSON) if err != nil { t.Fatalf("unexpected error calling GeneratePluginDocs: %v", err) } diff --git a/docs/json.go b/docs/json.go new file mode 100644 index 0000000000..8972a86b8c --- /dev/null +++ b/docs/json.go @@ -0,0 +1,62 @@ +package docs + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type jsonTable struct { + Name string `json:"name"` + Title string `json:"title"` + Description string `json:"description"` + Columns []jsonColumn `json:"columns"` + Relations []jsonTable `json:"relations"` +} + +type jsonColumn struct { + Name string `json:"name"` + Type string `json:"type"` + IsPrimaryKey bool `json:"is_primary_key,omitempty"` + IsIncrementalKey bool `json:"is_incremental_key,omitempty"` +} + +func (g *Generator) renderTablesAsJSON(dir string) error { + jsonTables := g.jsonifyTables(g.tables) + buffer := &bytes.Buffer{} + m := json.NewEncoder(buffer) + m.SetIndent("", " ") + m.SetEscapeHTML(false) + err := m.Encode(jsonTables) + if err != nil { + return err + } + outputPath := filepath.Join(dir, "__tables.json") + return os.WriteFile(outputPath, buffer.Bytes(), 0644) +} + +func (g *Generator) jsonifyTables(tables schema.Tables) []jsonTable { + jsonTables := make([]jsonTable, len(tables)) + for i, table := range tables { + jsonColumns := make([]jsonColumn, len(table.Columns)) + for c, col := range table.Columns { + jsonColumns[c] = jsonColumn{ + Name: col.Name, + Type: col.Type.String(), + IsPrimaryKey: col.PrimaryKey, + IsIncrementalKey: col.IncrementalKey, + } + } + jsonTables[i] = jsonTable{ + Name: table.Name, + Title: g.titleTransformer(table), + Description: table.Description, + Columns: jsonColumns, + Relations: g.jsonifyTables(table.Relations), + } + } + return jsonTables +} diff --git a/docs/markdown.go b/docs/markdown.go new file mode 100644 index 0000000000..6f8fe9dcaa --- /dev/null +++ b/docs/markdown.go @@ -0,0 +1,94 @@ +package docs + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "text/template" + + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type templateData struct { + PluginName string + Tables schema.Tables +} + +func (g *Generator) renderTablesAsMarkdown(dir string) error { + for _, table := range g.tables { + if err := g.renderAllTables(dir, table); err != nil { + return err + } + } + t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ + "indentToDepth": indentToDepth, + }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template for README.md: %v", err) + } + + var b bytes.Buffer + if err := t.Execute(&b, templateData{PluginName: g.pluginName, Tables: g.tables}); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + outputPath := filepath.Join(dir, "README.md") + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return nil +} + +func (g *Generator) renderAllTables(dir string, t *schema.Table) error { + if err := g.renderTable(dir, t); err != nil { + return err + } + for _, r := range t.Relations { + if err := g.renderAllTables(dir, r); err != nil { + return err + } + } + return nil +} + +func (g *Generator) renderTable(dir string, table *schema.Table) error { + t := template.New("").Funcs(map[string]any{ + "title": g.titleTransformer, + }) + t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") + if err != nil { + return fmt.Errorf("failed to parse template: %v", err) + } + + outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) + + var b bytes.Buffer + if err := t.Execute(&b, table); err != nil { + return fmt.Errorf("failed to execute template: %v", err) + } + content := formatMarkdown(b.String()) + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create file %v: %v", outputPath, err) + } + f.WriteString(content) + return f.Close() +} + +func formatMarkdown(s string) string { + s = reMatchNewlines.ReplaceAllString(s, "\n\n") + return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") +} + +func indentToDepth(table *schema.Table) string { + s := "" + t := table + for t.Parent != nil { + s += " " + t = t.Parent + } + return s +} diff --git a/plugin/templates/all_tables.md.go.tpl b/docs/templates/all_tables.md.go.tpl similarity index 100% rename from plugin/templates/all_tables.md.go.tpl rename to docs/templates/all_tables.md.go.tpl diff --git a/plugin/templates/all_tables_entry.md.go.tpl b/docs/templates/all_tables_entry.md.go.tpl similarity index 100% rename from plugin/templates/all_tables_entry.md.go.tpl rename to docs/templates/all_tables_entry.md.go.tpl diff --git a/plugin/templates/table.md.go.tpl b/docs/templates/table.md.go.tpl similarity index 100% rename from plugin/templates/table.md.go.tpl rename to docs/templates/table.md.go.tpl diff --git a/plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json b/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-JSON-__tables.json rename to docs/testdata/TestGeneratePluginDocs-JSON-__tables.json diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-README.md b/docs/testdata/TestGeneratePluginDocs-Markdown-README.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-README.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-README.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-relation_table.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md diff --git a/plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md similarity index 100% rename from plugin/testdata/TestGeneratePluginDocs-Markdown-test_table.md rename to docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md diff --git a/helpers/internal_columns.go b/helpers/internal_columns.go index 12668d607f..345b806ac4 100644 --- a/helpers/internal_columns.go +++ b/helpers/internal_columns.go @@ -1 +1 @@ -package helpers \ No newline at end of file +package helpers diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 13ad7f74b9..a23316939b 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -155,7 +155,6 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resourc return nil } - func (c *client) Close(context.Context) error { c.memoryDB = nil return nil diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 00e2543d99..92c7c27cc1 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -6,8 +6,6 @@ import ( "errors" "fmt" "io" - "os" - "path/filepath" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" @@ -44,7 +42,6 @@ func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTable }, nil } - func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { return &pb.GetName_Response{ Name: s.Plugin.Name(), @@ -248,40 +245,6 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } } -func (s *Server) GenDocs(req *pb.GenDocs_Request, srv pb.Plugin_GenDocsServer) error { - tmpDir, err := os.MkdirTemp("", "cloudquery-docs") - if err != nil { - return fmt.Errorf("failed to create tmp dir: %w", err) - } - defer os.RemoveAll(tmpDir) - err = s.Plugin.GeneratePluginDocs(tmpDir, req.Format) - if err != nil { - return fmt.Errorf("failed to generate docs: %w", err) - } - - // list files in tmpDir - files, err := os.ReadDir(tmpDir) - if err != nil { - return fmt.Errorf("failed to read tmp dir: %w", err) - } - for _, f := range files { - if f.IsDir() { - continue - } - content, err := os.ReadFile(filepath.Join(tmpDir, f.Name())) - if err != nil { - return fmt.Errorf("failed to read file: %w", err) - } - if err := srv.Send(&pb.GenDocs_Response{ - Filename: f.Name(), - Content: content, - }); err != nil { - return fmt.Errorf("failed to send file: %w", err) - } - } - return nil -} - func checkMessageSize(msg proto.Message, record arrow.Record) error { size := proto.Size(msg) // log error to Sentry if row exceeds half of the max size diff --git a/plugin/docs.go b/plugin/docs.go deleted file mode 100644 index 6e4dccf581..0000000000 --- a/plugin/docs.go +++ /dev/null @@ -1,246 +0,0 @@ -package plugin - -import ( - "bytes" - "context" - "embed" - "encoding/json" - "fmt" - "os" - "path/filepath" - "regexp" - "sort" - "text/template" - - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/caser" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -//go:embed templates/*.go.tpl -var templatesFS embed.FS - -var reMatchNewlines = regexp.MustCompile(`\n{3,}`) -var reMatchHeaders = regexp.MustCompile(`(#{1,6}.+)\n+`) - -var DefaultTitleExceptions = map[string]string{ - // common abbreviations - "acl": "ACL", - "acls": "ACLs", - "api": "API", - "apis": "APIs", - "ca": "CA", - "cidr": "CIDR", - "cidrs": "CIDRs", - "db": "DB", - "dbs": "DBs", - "dhcp": "DHCP", - "iam": "IAM", - "iot": "IOT", - "ip": "IP", - "ips": "IPs", - "ipv4": "IPv4", - "ipv6": "IPv6", - "mfa": "MFA", - "ml": "ML", - "oauth": "OAuth", - "vpc": "VPC", - "vpcs": "VPCs", - "vpn": "VPN", - "vpns": "VPNs", - "waf": "WAF", - "wafs": "WAFs", - - // cloud providers - "aws": "AWS", - "gcp": "GCP", -} - -func DefaultTitleTransformer(table *schema.Table) string { - if table.Title != "" { - return table.Title - } - csr := caser.New(caser.WithCustomExceptions(DefaultTitleExceptions)) - return csr.ToTitle(table.Name) -} - -func sortTables(tables schema.Tables) { - sort.SliceStable(tables, func(i, j int) bool { - return tables[i].Name < tables[j].Name - }) - - for _, table := range tables { - sortTables(table.Relations) - } -} - -type templateData struct { - PluginName string - Tables schema.Tables -} - -// GeneratePluginDocs creates table documentation for the source plugin based on its list of tables -func (p *Plugin) GeneratePluginDocs(dir string, format pbPlugin.GenDocs_FORMAT) error { - if err := os.MkdirAll(dir, os.ModePerm); err != nil { - return err - } - tables, err := p.Tables(context.Background()) - if err != nil { - return err - } - setDestinationManagedCqColumns(tables) - - sortedTables := make(schema.Tables, 0, len(tables)) - for _, t := range tables { - sortedTables = append(sortedTables, t.Copy(nil)) - } - sortTables(sortedTables) - - switch format { - case pbPlugin.GenDocs_FORMAT_MARKDOWN: - return p.renderTablesAsMarkdown(dir, p.name, sortedTables) - case pbPlugin.GenDocs_FORMAT_JSON: - return p.renderTablesAsJSON(dir, sortedTables) - default: - return fmt.Errorf("unsupported format: %v", format) - } -} - -// setDestinationManagedCqColumns overwrites or adds the CQ columns that are managed by the destination plugins (_cq_sync_time, _cq_source_name). -func setDestinationManagedCqColumns(tables []*schema.Table) { - for _, table := range tables { - table.OverwriteOrAddColumn(&schema.CqSyncTimeColumn) - table.OverwriteOrAddColumn(&schema.CqSourceNameColumn) - setDestinationManagedCqColumns(table.Relations) - } -} - -type jsonTable struct { - Name string `json:"name"` - Title string `json:"title"` - Description string `json:"description"` - Columns []jsonColumn `json:"columns"` - Relations []jsonTable `json:"relations"` -} - -type jsonColumn struct { - Name string `json:"name"` - Type string `json:"type"` - IsPrimaryKey bool `json:"is_primary_key,omitempty"` - IsIncrementalKey bool `json:"is_incremental_key,omitempty"` -} - -func (p *Plugin) renderTablesAsJSON(dir string, tables schema.Tables) error { - jsonTables := p.jsonifyTables(tables) - buffer := &bytes.Buffer{} - m := json.NewEncoder(buffer) - m.SetIndent("", " ") - m.SetEscapeHTML(false) - err := m.Encode(jsonTables) - if err != nil { - return err - } - outputPath := filepath.Join(dir, "__tables.json") - return os.WriteFile(outputPath, buffer.Bytes(), 0644) -} - -func (p *Plugin) jsonifyTables(tables schema.Tables) []jsonTable { - jsonTables := make([]jsonTable, len(tables)) - for i, table := range tables { - jsonColumns := make([]jsonColumn, len(table.Columns)) - for c, col := range table.Columns { - jsonColumns[c] = jsonColumn{ - Name: col.Name, - Type: col.Type.String(), - IsPrimaryKey: col.PrimaryKey, - IsIncrementalKey: col.IncrementalKey, - } - } - jsonTables[i] = jsonTable{ - Name: table.Name, - Title: p.titleTransformer(table), - Description: table.Description, - Columns: jsonColumns, - Relations: p.jsonifyTables(table.Relations), - } - } - return jsonTables -} - -func (p *Plugin) renderTablesAsMarkdown(dir string, pluginName string, tables schema.Tables) error { - for _, table := range tables { - if err := p.renderAllTables(table, dir); err != nil { - return err - } - } - t, err := template.New("all_tables.md.go.tpl").Funcs(template.FuncMap{ - "indentToDepth": indentToDepth, - }).ParseFS(templatesFS, "templates/all_tables*.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template for README.md: %v", err) - } - - var b bytes.Buffer - if err := t.Execute(&b, templateData{PluginName: pluginName, Tables: tables}); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - outputPath := filepath.Join(dir, "README.md") - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return nil -} - -func (p *Plugin) renderAllTables(t *schema.Table, dir string) error { - if err := p.renderTable(t, dir); err != nil { - return err - } - for _, r := range t.Relations { - if err := p.renderAllTables(r, dir); err != nil { - return err - } - } - return nil -} - -func (p *Plugin) renderTable(table *schema.Table, dir string) error { - t := template.New("").Funcs(map[string]any{ - "title": p.titleTransformer, - }) - t, err := t.New("table.md.go.tpl").ParseFS(templatesFS, "templates/table.md.go.tpl") - if err != nil { - return fmt.Errorf("failed to parse template: %v", err) - } - - outputPath := filepath.Join(dir, fmt.Sprintf("%s.md", table.Name)) - - var b bytes.Buffer - if err := t.Execute(&b, table); err != nil { - return fmt.Errorf("failed to execute template: %v", err) - } - content := formatMarkdown(b.String()) - f, err := os.Create(outputPath) - if err != nil { - return fmt.Errorf("failed to create file %v: %v", outputPath, err) - } - f.WriteString(content) - return f.Close() -} - -func formatMarkdown(s string) string { - s = reMatchNewlines.ReplaceAllString(s, "\n\n") - return reMatchHeaders.ReplaceAllString(s, `$1`+"\n\n") -} - -func indentToDepth(table *schema.Table) string { - s := "" - t := table - for t.Parent != nil { - s += " " - t = t.Parent - } - return s -} diff --git a/plugin/options.go b/plugin/options.go index aaa8687a51..ebdf1329f4 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,8 +1,6 @@ package plugin import ( - "fmt" - "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -21,31 +19,6 @@ func (m MigrateMode) String() string { return migrateModeStrings[m] } -type Registry int - -const ( - RegistryGithub Registry = iota - RegistryLocal - RegistryGrpc -) - -func (r Registry) String() string { - return [...]string{"github", "local", "grpc"}[r] -} - -func RegistryFromString(s string) (Registry, error) { - switch s { - case "github": - return RegistryGithub, nil - case "local": - return RegistryLocal, nil - case "grpc": - return RegistryGrpc, nil - default: - return RegistryGithub, fmt.Errorf("unknown registry %s", s) - } -} - type WriteMode int const ( @@ -64,7 +37,6 @@ func (m WriteMode) String() string { type Option func(*Plugin) - // WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables func WithNoInternalColumns() Option { return func(p *Plugin) { @@ -78,4 +50,4 @@ func WithTitleTransformer(t func(*schema.Table) string) Option { return func(p *Plugin) { p.titleTransformer = t } -} \ No newline at end of file +} diff --git a/plugin/plugin.go b/plugin/plugin.go index d52fcdf2ae..05dfcff7ee 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -69,7 +69,7 @@ type Plugin struct { // useful for sources such as PostgreSQL and other databases internalColumns bool // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string + titleTransformer func(*schema.Table) string } const ( diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index da76a3ed97..57f9f52bea 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -48,7 +48,6 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } - func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { var err error ch := make(chan arrow.Record) diff --git a/plugin/benchmark_test.go.backup b/scheduler/benchmark_test.go.backup similarity index 100% rename from plugin/benchmark_test.go.backup rename to scheduler/benchmark_test.go.backup diff --git a/serve/docs.go b/serve/docs.go new file mode 100644 index 0000000000..442b6308f1 --- /dev/null +++ b/serve/docs.go @@ -0,0 +1,47 @@ +package serve + +import ( + "fmt" + "strings" + + "github.com/cloudquery/plugin-sdk/v4/docs" + "github.com/spf13/cobra" +) + +const ( + pluginDocShort = "Generate documentation for tables" + pluginDocLong = `Generate documentation for tables + +If format is markdown, a destination directory will be created (if necessary) containing markdown files. +Example: +doc ./output + +If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. +Example: +doc --format json . +` +) + +func (s *PluginServe) newCmdPluginDoc() *cobra.Command { + format := newEnum([]string{"json", "markdown"}, "markdown") + cmd := &cobra.Command{ + Use: "doc ", + Short: pluginDocShort, + Long: pluginDocLong, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + tables, err := s.plugin.Tables(cmd.Context()) + if err != nil { + return err + } + g := docs.NewGenerator(s.plugin.Name(), tables) + f := docs.FormatMarkdown + if format.Value == "json" { + f = docs.FormatJSON + } + return g.Generate(args[0], f) + }, + } + cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) + return cmd +} diff --git a/serve/docs_test.go b/serve/docs_test.go new file mode 100644 index 0000000000..9b65230168 --- /dev/null +++ b/serve/docs_test.go @@ -0,0 +1 @@ +package serve diff --git a/serve/plugin.go b/serve/plugin.go index f64d0ba1ec..9c55830987 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -245,36 +245,6 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { return cmd } -const ( - pluginDocShort = "Generate documentation for tables" - pluginDocLong = `Generate documentation for tables - -If format is markdown, a destination directory will be created (if necessary) containing markdown files. -Example: -doc ./output - -If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. -Example: -doc --format json . -` -) - -func (s *PluginServe) newCmdPluginDoc() *cobra.Command { - format := newEnum([]string{"json", "markdown"}, "markdown") - cmd := &cobra.Command{ - Use: "doc ", - Short: pluginDocShort, - Long: pluginDocLong, - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - pbFormat := pbv3.GenDocs_FORMAT(pbv3.GenDocs_FORMAT_value[format.Value]) - return s.plugin.GeneratePluginDocs(args[0], pbFormat) - }, - } - cmd.Flags().Var(format, "format", fmt.Sprintf("output format. one of: %s", strings.Join(format.Allowed, ","))) - return cmd -} - func (s *PluginServe) newCmdPluginRoot() *cobra.Command { cmd := &cobra.Command{ Use: fmt.Sprintf("%s ", s.plugin.Name()), diff --git a/transformers/tables.go b/transformers/tables.go index 94532c6ca3..99b563e2e5 100644 --- a/transformers/tables.go +++ b/transformers/tables.go @@ -55,4 +55,4 @@ func TransformTables(tables schema.Tables) error { } } return nil -} \ No newline at end of file +} From 540a99edbcd5dbad9baf6ca5ec01da6f84728dca Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 10 Jun 2023 17:42:24 +0300 Subject: [PATCH 065/125] more wip --- cover | 199 +++++++ docs/{docs.go => generator.go} | 0 docs/{docs_test.go => generator_test.go} | 0 .../TestGeneratePluginDocs-JSON-__tables.json | 100 ---- ...tePluginDocs-Markdown-incremental_table.md | 4 - ...Docs-Markdown-relation_relation_table_a.md | 6 +- ...Docs-Markdown-relation_relation_table_b.md | 6 +- ...eratePluginDocs-Markdown-relation_table.md | 6 +- ...tGeneratePluginDocs-Markdown-test_table.md | 4 - go.mod | 7 +- go.sum | 7 +- plugin/messages.go | 84 +++ plugin/options.go | 19 - plugin/plugin.go | 49 +- plugin/plugin_reader.go | 22 +- plugin/plugin_writer.go | 56 +- plugin/testing_overwrite_deletestale.go | 168 ------ plugin/testing_sync.go | 18 +- plugin/testing_upsert.go | 69 +++ plugin/testing_write.go | 243 ++------- plugin/testing_write_append.go | 95 ---- plugin/testing_write_delete.go | 84 +++ plugin/testing_write_insert.go | 68 +++ plugin/testing_write_migrate.go | 177 +++---- plugin/testing_write_overwrite.go | 115 ----- plugin/testing_write_upsert.go | 69 +++ scheduler/benchmark_test.go | 1 + scheduler/metrics.go | 1 + .../plugin_managed_source_test.go.backup | 484 ------------------ scheduler/scheduler.go | 26 +- scheduler/scheduler_round_robin_test.go | 7 - scheduler/scheduler_test.go | 278 ++++++++++ schema/resource.go | 5 + serve/docs_test.go | 19 + serve/plugin_test.go | 93 +--- ...ate_v3_test.go => state_v3_test.go.backup} | 0 transformers/tables.go | 28 - 37 files changed, 1056 insertions(+), 1561 deletions(-) create mode 100644 cover rename docs/{docs.go => generator.go} (100%) rename docs/{docs_test.go => generator_test.go} (100%) create mode 100644 plugin/messages.go delete mode 100644 plugin/testing_overwrite_deletestale.go create mode 100644 plugin/testing_upsert.go delete mode 100644 plugin/testing_write_append.go create mode 100644 plugin/testing_write_delete.go create mode 100644 plugin/testing_write_insert.go delete mode 100644 plugin/testing_write_overwrite.go create mode 100644 plugin/testing_write_upsert.go create mode 100644 scheduler/benchmark_test.go delete mode 100644 scheduler/plugin_managed_source_test.go.backup create mode 100644 scheduler/scheduler_test.go rename serve/{state_v3_test.go => state_v3_test.go.backup} (100%) diff --git a/cover b/cover new file mode 100644 index 0000000000..5fb4e3be13 --- /dev/null +++ b/cover @@ -0,0 +1,199 @@ +mode: set +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:45.37,47.30 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:53.2,53.24 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:47.30,48.12 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:51.3,51.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:48.12,50.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:56.44,58.2 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:62.47,63.28 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:63.28,65.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:68.59,69.28 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:69.28,71.3 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:74.49,75.28 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:75.28,77.3 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:80.63,81.28 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:81.28,83.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:104.94,113.27 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:116.2,116.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:113.27,115.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:119.78,121.12 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:132.2,132.34 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:139.2,139.12 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:121.12,123.21 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:124.21,125.29 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:126.28,127.36 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:128.11,129.57 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:132.34,138.3 5 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:142.86,144.31 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:144.31,148.3 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:151.157,160.15 9 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:171.2,171.38 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:185.2,185.34 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:189.2,189.39 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:201.2,202.17 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:160.15,161.35 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:161.35,165.47 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:165.47,168.5 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:171.38,172.74 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:172.74,175.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:181.4,181.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:175.38,176.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:176.48,179.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:185.34,187.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:189.39,190.75 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:190.75,193.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:193.38,194.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:194.48,197.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:205.183,208.15 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:221.2,221.23 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:208.15,209.35 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:209.35,213.47 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:213.47,217.5 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:221.23,222.62 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:222.62,225.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:225.38,226.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:226.48,230.6 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:233.8,236.15 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:236.15,238.18 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:238.18,241.39 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:241.39,242.49 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:242.49,246.7 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:253.44,255.22 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:258.2,258.31 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:264.2,264.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:255.22,257.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:258.31,260.23 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:260.23,262.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:269.30,270.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:273.2,273.10 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:270.11,272.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:17.93,24.42 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:29.2,34.33 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:58.2,59.33 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:81.2,81.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:24.42,28.3 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:34.33,36.29 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:40.3,41.29 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:52.3,55.44 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:36.29,38.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:41.29,42.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:42.41,44.5 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:44.10,45.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:49.5,49.110 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:45.48,48.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:59.33,62.34 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:62.34,64.57 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:69.4,70.14 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:64.57,68.5 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:70.14,76.5 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:84.184,89.19 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:92.2,95.12 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:121.2,121.21 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:126.2,126.19 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:89.19,91.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:95.12,96.16 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:108.3,108.66 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:96.16,97.36 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:106.4,106.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:97.36,99.48 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:103.5,104.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:99.48,102.6 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:108.66,111.38 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:117.4,117.10 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:111.38,112.48 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:112.48,115.6 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:121.21,123.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:126.19,129.3 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:132.203,134.30 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:137.2,138.12 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:194.2,195.38 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:213.2,213.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:134.30,136.3 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:138.12,142.33 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:191.3,191.12 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:142.33,144.56 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:150.4,151.14 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:144.56,149.5 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:151.14,156.32 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:160.5,160.79 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:174.5,174.55 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:188.5,188.38 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:156.32,158.6 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:160.79,163.86 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:171.6,172.12 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:163.86,166.50 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:166.50,169.8 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:174.55,177.86 3 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:185.6,186.12 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:177.86,180.50 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:180.50,183.8 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:195.38,198.53 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:198.53,200.61 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:205.4,206.14 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:200.61,204.5 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:206.14,210.5 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:16.100,21.42 4 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:26.2,31.33 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:42.2,45.34 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:66.2,66.11 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:21.42,25.3 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:31.33,33.29 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:36.3,39.44 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:33.29,35.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:45.34,48.56 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:53.3,54.13 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:48.56,52.4 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:54.13,62.4 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:71.108,74.6 3 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:87.2,87.21 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:74.6,76.32 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:82.3,83.16 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:76.32,77.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:77.41,80.5 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:83.16,84.9 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:23.68,25.2 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:28.46,29.48 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:42.2,42.52 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:55.2,55.13 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:29.48,30.42 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:30.42,31.46 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:34.4,34.54 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:37.4,37.54 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:31.46,33.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:34.54,36.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:37.54,39.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:42.52,43.42 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:43.42,44.42 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:47.4,47.50 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:50.4,50.50 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:44.42,46.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:47.50,49.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:50.50,52.5 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:58.85,60.33 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:63.2,63.43 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:60.33,62.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:63.43,65.3 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:68.40,70.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:75.2,75.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:70.46,71.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:71.41,73.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:78.46,80.46 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:85.2,85.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:80.46,81.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:81.41,83.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:88.40,90.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:95.2,95.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:90.46,91.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:91.41,93.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:98.46,100.46 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:105.2,105.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:100.46,101.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:101.41,103.4 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:108.43,110.46 2 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:115.2,115.14 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:110.46,111.41 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:111.41,113.4 1 1 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:118.49,120.46 2 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:125.2,125.14 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:120.46,121.41 1 0 +github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:121.41,123.4 1 0 diff --git a/docs/docs.go b/docs/generator.go similarity index 100% rename from docs/docs.go rename to docs/generator.go diff --git a/docs/docs_test.go b/docs/generator_test.go similarity index 100% rename from docs/docs_test.go rename to docs/generator_test.go diff --git a/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json b/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json index 7a8280833e..2623746cb5 100644 --- a/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json +++ b/docs/testdata/TestGeneratePluginDocs-JSON-__tables.json @@ -4,22 +4,6 @@ "title": "Incremental Table", "description": "Description for incremental table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "int_col", "type": "int64" @@ -43,22 +27,6 @@ "title": "Test Table", "description": "Description for test table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid" - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "int_col", "type": "int64" @@ -96,23 +64,6 @@ "title": "Relation Table", "description": "Description for relational table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" @@ -124,23 +75,6 @@ "title": "Relation Relation Table A", "description": "Description for relational table's relation", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" @@ -153,23 +87,6 @@ "title": "Relation Relation Table B", "description": "Description for relational table's relation", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" @@ -184,23 +101,6 @@ "title": "Relation Table2", "description": "Description for second relational table", "columns": [ - { - "name": "_cq_source_name", - "type": "utf8" - }, - { - "name": "_cq_sync_time", - "type": "timestamp[us, tz=UTC]" - }, - { - "name": "_cq_id", - "type": "uuid", - "is_primary_key": true - }, - { - "name": "_cq_parent_id", - "type": "uuid" - }, { "name": "string_col", "type": "utf8" diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md index 67ca4b8539..4148e838eb 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-incremental_table.md @@ -11,10 +11,6 @@ It supports incremental syncs based on the (**id_col**, **id_col2**) columns. | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id|uuid| -|_cq_parent_id|uuid| |int_col|int64| |id_col (PK) (Incremental Key)|int64| |id_col2 (Incremental Key)|int64| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md index 038791b13e..1c0b8b63c8 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_a.md @@ -4,7 +4,7 @@ This table shows data for Relation Relation Table A. Description for relational table's relation -The primary key for this table is **_cq_id**. +The composite primary key for this table is (). ## Relations @@ -14,8 +14,4 @@ This table depends on [relation_table](relation_table.md). | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id (PK)|uuid| -|_cq_parent_id|uuid| |string_col|utf8| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md index 432f6533f8..77dce363dc 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_relation_table_b.md @@ -4,7 +4,7 @@ This table shows data for Relation Relation Table B. Description for relational table's relation -The primary key for this table is **_cq_id**. +The composite primary key for this table is (). ## Relations @@ -14,8 +14,4 @@ This table depends on [relation_table](relation_table.md). | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id (PK)|uuid| -|_cq_parent_id|uuid| |string_col|utf8| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md index 7db8baff7e..96b152a8fe 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-relation_table.md @@ -4,7 +4,7 @@ This table shows data for Relation Table. Description for relational table -The primary key for this table is **_cq_id**. +The composite primary key for this table is (). ## Relations @@ -18,8 +18,4 @@ The following tables depend on relation_table: | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id (PK)|uuid| -|_cq_parent_id|uuid| |string_col|utf8| diff --git a/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md b/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md index f0c91578a5..089a0b4b3e 100644 --- a/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md +++ b/docs/testdata/TestGeneratePluginDocs-Markdown-test_table.md @@ -16,10 +16,6 @@ The following tables depend on test_table: | Name | Type | | ------------- | ------------- | -|_cq_source_name|utf8| -|_cq_sync_time|timestamp[us, tz=UTC]| -|_cq_id|uuid| -|_cq_parent_id|uuid| |int_col|int64| |id_col (PK)|int64| |id_col2 (PK)|int64| diff --git a/go.mod b/go.mod index 6210df53c0..420f12c1ca 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,11 @@ require ( google.golang.org/protobuf v1.30.0 ) +<<<<<<< HEAD replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 +======= +replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c +>>>>>>> 7e5547e (more wip) replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go @@ -36,6 +40,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v23.1.21+incompatible // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/klauspost/compress v1.16.0 // indirect @@ -46,7 +51,7 @@ require ( github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect - github.com/pierrec/lz4/v4 v4.1.15 // indirect + github.com/pierrec/lz4/v4 v4.1.17 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/schollz/progressbar/v3 v3.13.1 // indirect diff --git a/go.sum b/go.sum index 0ebdc98a2f..6d7acabc3e 100644 --- a/go.sum +++ b/go.sum @@ -47,8 +47,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8 h1:CmgLSEGQNLHpUQ5cU4L4aF7cuJZRnc1toIIWqC1gmPg= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230509053643-898a79b1d3c8/go.mod h1:/XatdE3kDIBqZKhZ7OBUHwP2jaASDFZHqF4puOWM8po= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c h1:nQSB4v0QxCW5XDLvVBcaNrsJ+J/esMBoFYjymllxM1E= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= @@ -153,8 +153,8 @@ github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -190,7 +190,6 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.19.0/go.mod h1:IzD0RJ65iWH0w97OQQebJEvTZYvsCUm9WVLWBQrJRjo= diff --git a/plugin/messages.go b/plugin/messages.go new file mode 100644 index 0000000000..fa975cc97c --- /dev/null +++ b/plugin/messages.go @@ -0,0 +1,84 @@ +package plugin + +import ( + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type MessageType int + +const ( + // Create table + MessageTypeCreate MessageType = iota + // Insert record + MessageTypeInsert + // Insert or update record + MessageTypeUpsert + // Delete rows + MessageTypeDelete +) + +type MessageCreateTable struct { + Table *schema.Table + Force bool +} + +func (*MessageCreateTable) Type() MessageType { + return MessageTypeCreate +} + +type MessageInsert struct { + Record arrow.Record + Columns []string + Upsert bool +} + +func (*MessageInsert) Type() MessageType { + return MessageTypeInsert +} + +type Operator int + +const ( + OperatorEqual Operator = iota + OperatorNotEqual + OperatorGreaterThan + OperatorGreaterThanOrEqual + OperatorLessThan + OperatorLessThanOrEqual +) + +type WhereClause struct { + Column string + Operator Operator + Value string +} + +type MessageDelete struct { + Record arrow.Record + // currently delete only supports and where clause as we don't support + // full AST parsing + WhereClauses []WhereClause +} + +func (*MessageDelete) Type() MessageType { + return MessageTypeDelete +} + +type Message interface { + Type() MessageType +} + +type Messages []Message + +func (m Messages) InsertItems() int64 { + items := int64(0) + for _, msg := range m { + switch msg.Type() { + case MessageTypeInsert: + msgInsert := msg.(*MessageInsert) + items += msgInsert.Record.NumRows() + } + } + return items +} diff --git a/plugin/options.go b/plugin/options.go index ebdf1329f4..09a771d0b6 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -1,9 +1,5 @@ package plugin -import ( - "github.com/cloudquery/plugin-sdk/v4/schema" -) - type MigrateMode int const ( @@ -36,18 +32,3 @@ func (m WriteMode) String() string { } type Option func(*Plugin) - -// WithNoInternalColumns won't add internal columns (_cq_id, _cq_parent_cq_id) to the plugin tables -func WithNoInternalColumns() Option { - return func(p *Plugin) { - p.internalColumns = false - } -} - -// WithTitleTransformer allows the plugin to control how table names get turned into titles for the -// generated documentation. -func WithTitleTransformer(t func(*schema.Table) string) Option { - return func(p *Plugin) { - p.titleTransformer = t - } -} diff --git a/plugin/plugin.go b/plugin/plugin.go index 05dfcff7ee..9900d16e26 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -4,9 +4,7 @@ import ( "context" "fmt" "sync" - "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -15,30 +13,20 @@ type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { Tables(ctx context.Context) (schema.Tables, error) - Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error - Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateOptions) error - Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error - DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error + Sync(ctx context.Context, options SyncOptions, res chan<- Message) error + Write(ctx context.Context, options WriteOptions, res <-chan Message) error Close(ctx context.Context) error } type UnimplementedWriter struct{} -func (UnimplementedWriter) Migrate(ctx context.Context, tables schema.Tables, migrateMode MigrateMode) error { - return fmt.Errorf("not implemented") -} - -func (UnimplementedWriter) Write(ctx context.Context, tables schema.Tables, writeMode WriteMode, res <-chan arrow.Record) error { - return fmt.Errorf("not implemented") -} - -func (UnimplementedWriter) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { +func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { return fmt.Errorf("not implemented") } type UnimplementedSync struct{} -func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { +func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { return fmt.Errorf("not implemented") } @@ -57,8 +45,6 @@ type Plugin struct { newClient NewClientFunc // Logger to call, this logger is passed to the serve.Serve Client, if not defined Serve will create one instead. logger zerolog.Logger - // maxDepth is the max depth of tables - maxDepth uint64 // mu is a mutex that limits the number of concurrent init/syncs (can only be one at a time) mu sync.Mutex // client is the initialized session client @@ -68,8 +54,6 @@ type Plugin struct { // NoInternalColumns if set to true will not add internal columns to tables such as _cq_id and _cq_parent_id // useful for sources such as PostgreSQL and other databases internalColumns bool - // titleTransformer allows the plugin to control how table names get turned into titles for generated documentation - titleTransformer func(*schema.Table) string } const ( @@ -94,11 +78,10 @@ func maxDepth(tables schema.Tables) uint64 { // Depending on the options, it can be write only plugin, read only plugin or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ - name: name, - version: version, - internalColumns: true, - titleTransformer: DefaultTitleTransformer, - newClient: newClient, + name: name, + version: version, + internalColumns: true, + newClient: newClient, } for _, opt := range options { opt(&p) @@ -125,22 +108,6 @@ func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { if err != nil { return nil, fmt.Errorf("failed to get tables: %w", err) } - setParents(tables, nil) - if err := transformTables(tables); err != nil { - return nil, err - } - if p.internalColumns { - if err := p.addInternalColumns(tables); err != nil { - return nil, err - } - } - p.maxDepth = maxDepth(tables) - if p.maxDepth > maxAllowedDepth { - return nil, fmt.Errorf("max depth of tables is %d, max allowed is %d", p.maxDepth, maxAllowedDepth) - } - if err := p.validate(tables); err != nil { - return nil, err - } return tables, nil } diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 57f9f52bea..89963d7eb4 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -3,9 +3,8 @@ package plugin import ( "context" "fmt" - "time" - "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" ) @@ -15,15 +14,12 @@ type SyncOptions struct { SkipTables []string Concurrency int64 DeterministicCQID bool - // SyncTime if specified then this will be add to every table as _sync_time column - SyncTime time.Time - // If spceified then this will be added to every table as _source_name column - SourceName string - StateBackend state.Client + StateBackend state.Client } type ReadOnlyClient interface { - Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error + Tables(ctx context.Context) (schema.Tables, error) + Sync(ctx context.Context, options SyncOptions, res chan<- Message) error Close(ctx context.Context) error } @@ -48,15 +44,15 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } -func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Record, error) { +func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) (Messages, error) { var err error - ch := make(chan arrow.Record) + ch := make(chan Message) go func() { defer close(ch) err = p.Sync(ctx, options, ch) }() // nolint:prealloc - var resources []arrow.Record + var resources []Message for resource := range ch { resources = append(resources, resource) } @@ -64,12 +60,12 @@ func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) ([]arrow.Reco } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- arrow.Record) error { +func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - p.syncTime = options.SyncTime + // p.syncTime = options.SyncTime // startTime := time.Now() if err := p.client.Sync(ctx, options, res); err != nil { diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index d0420182be..009ac23ad7 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -2,76 +2,30 @@ package plugin import ( "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/cloudquery/plugin-sdk/v4/schema" ) type WriteOptions struct { - // WriteMode is the mode to write to the database - WriteMode WriteMode - // Predefined tables are available if tables are known at the start of the write - Tables schema.Tables -} - -type MigrateOptions struct { - // MigrateMode is the mode to migrate the database - MigrateMode MigrateMode -} - -func (p *Plugin) Migrate(ctx context.Context, tables schema.Tables, options MigrateOptions) error { - if p.client == nil { - return fmt.Errorf("plugin is not initialized") - } - return p.client.Migrate(ctx, tables, options) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource arrow.Record) error { - resources := []arrow.Record{resource} +func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { + resources := []Message{resource} return p.writeAll(ctx, options, resources) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []arrow.Record) error { - ch := make(chan arrow.Record, len(resources)) +func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []Message) error { + ch := make(chan Message, len(resources)) for _, resource := range resources { ch <- resource } close(ch) - tables := make(schema.Tables, 0) - tableNames := make(map[string]struct{}) - for _, resource := range resources { - sc := resource.Schema() - tableMD := sc.Metadata() - name, found := tableMD.GetValue(schema.MetadataTableName) - if !found { - return fmt.Errorf("missing table name") - } - if _, ok := tableNames[name]; ok { - continue - } - table, err := schema.NewTableFromArrowSchema(resource.Schema()) - if err != nil { - return err - } - tables = append(tables, table) - tableNames[table.Name] = struct{}{} - } - options.Tables = tables return p.Write(ctx, options, ch) } -func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan arrow.Record) error { +func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { if err := p.client.Write(ctx, options, res); err != nil { return err } return nil } - -func (p *Plugin) DeleteStale(ctx context.Context, tables schema.Tables, sourceName string, syncTime time.Time) error { - syncTime = syncTime.UTC() - return p.client.DeleteStale(ctx, tables, sourceName, syncTime) -} diff --git a/plugin/testing_overwrite_deletestale.go b/plugin/testing_overwrite_deletestale.go deleted file mode 100644 index 6ac079ff13..0000000000 --- a/plugin/testing_overwrite_deletestale.go +++ /dev/null @@ -1,168 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/types" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { - writeMode := WriteModeOverwriteDeleteStale - if err := p.Init(ctx, spec); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - tableName := fmt.Sprintf("cq_overwrite_delete_stale_%d", time.Now().Unix()) - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - incTable := schema.TestTable(tableName+"_incremental", testOpts.TestSourceOptions) - incTable.IsIncremental = true - syncTime := time.Now().UTC().Round(1 * time.Second) - tables := schema.Tables{ - table, - incTable, - } - if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) - } - - sourceName := "testOverwriteSource" + uuid.NewString() - - opts := schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 2, - TimePrecision: testOpts.TimePrecision, - } - resources := schema.GenTestData(table, opts) - incResources := schema.GenTestData(incTable, opts) - allResources := resources - allResources = append(allResources, incResources...) - if err := p.writeAll(ctx, sourceName, syncTime, writeMode, allResources); err != nil { - return fmt.Errorf("failed to write all: %w", err) - } - sortRecordsBySyncTime(table, resources) - - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources, got %d", len(resourcesRead)) - } - testOpts.AllowNull.replaceNullsByEmpty(resources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resources) - } - if !recordApproxEqual(resources[0], resourcesRead[0]) { - diff := RecordDiff(resources[0], resourcesRead[0]) - return fmt.Errorf("expected first resource to be equal. diff: %s", diff) - } - - if !recordApproxEqual(resources[1], resourcesRead[1]) { - diff := RecordDiff(resources[1], resourcesRead[1]) - return fmt.Errorf("expected second resource to be equal. diff: %s", diff) - } - - // read from incremental table - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all: %w", err) - } - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources in incremental table, got %d", len(resourcesRead)) - } - - secondSyncTime := syncTime.Add(time.Second).UTC() - // copy first resource but update the sync time - cqIDInds := resources[0].Schema().FieldIndices(schema.CqIDColumn.Name) - u := resources[0].Column(cqIDInds[0]).(*types.UUIDArray).Value(0) - opts = schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: secondSyncTime, - StableUUID: u, - MaxRows: 1, - TimePrecision: testOpts.TimePrecision, - } - updatedResources := schema.GenTestData(table, opts) - updatedIncResources := schema.GenTestData(incTable, opts) - allUpdatedResources := updatedResources - allUpdatedResources = append(allUpdatedResources, updatedIncResources...) - - if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, allUpdatedResources); err != nil { - return fmt.Errorf("failed to write all second time: %w", err) - } - - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - if len(resourcesRead) != 1 { - return fmt.Errorf("after overwrite expected 1 resource, got %d", len(resourcesRead)) - } - testOpts.AllowNull.replaceNullsByEmpty(resources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resources) - } - if recordApproxEqual(resources[0], resourcesRead[0]) { - diff := RecordDiff(resources[0], resourcesRead[0]) - return fmt.Errorf("after overwrite expected first resource to be different. diff: %s", diff) - } - - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - if len(resourcesRead) != 1 { - return fmt.Errorf("expected 1 resource after delete stale, got %d", len(resourcesRead)) - } - - // we expect the only resource returned to match the updated resource we wrote - testOpts.AllowNull.replaceNullsByEmpty(updatedResources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(updatedResources) - } - if !recordApproxEqual(updatedResources[0], resourcesRead[0]) { - diff := RecordDiff(updatedResources[0], resourcesRead[0]) - return fmt.Errorf("after delete stale expected resource to be equal. diff: %s", diff) - } - - // we expect the incremental table to still have 3 resources, because delete-stale should - // not apply there - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{incTable.Name}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all from incremental table: %w", err) - } - if len(resourcesRead) != 3 { - return fmt.Errorf("expected 3 resources in incremental table after delete-stale, got %d", len(resourcesRead)) - } - - return nil -} diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 01a09c98b6..ecd136ca00 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -13,7 +13,7 @@ import ( type Validator func(t *testing.T, plugin *Plugin, resources []arrow.Record) -func TestPluginSync(t *testing.T, plugin *Plugin, sourceName string, spec any, options SyncOptions, opts ...TestPluginOption) { +func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, opts ...TestPluginOption) { t.Helper() o := &testPluginOptions{ @@ -101,19 +101,13 @@ func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) func validatePlugin(t *testing.T, plugin *Plugin, resources []arrow.Record) { t.Helper() - tables := extractTables(plugin.staticTables) - for _, table := range tables { - validateTable(t, table, resources) + tables, err := plugin.Tables(context.Background()) + if err != nil { + t.Fatal(err) } -} - -func extractTables(tables schema.Tables) []*schema.Table { - result := make([]*schema.Table, 0) - for _, table := range tables { - result = append(result, table) - result = append(result, extractTables(table.Relations)...) + for _, table := range tables.FlattenTables() { + validateTable(t, table, resources) } - return result } // Validates that every column has at least one non-nil value. diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go new file mode 100644 index 0000000000..f6b16f3ae3 --- /dev/null +++ b/plugin/testing_upsert.go @@ -0,0 +1,69 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (s *PluginTestSuite) testUpsert(ctx context.Context) error { + tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, + }, + } + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("foo") + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + + totalItems = messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + return nil +} diff --git a/plugin/testing_write.go b/plugin/testing_write.go index e7e50ef76f..5a358376af 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -2,21 +2,33 @@ package plugin import ( "context" - "os" "sort" "strings" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" - "github.com/rs/zerolog" ) type PluginTestSuite struct { tests PluginTestSuiteTests + + plugin *Plugin + + // AllowNull is a custom func to determine whether a data type may be correctly represented as null. + // Destinations that have problems representing some data types should provide a custom implementation here. + // If this param is empty, the default is to allow all data types to be nullable. + // When the value returned by this func is `true` the comparison is made with the empty value instead of null. + allowNull AllowNullFunc + + // IgnoreNullsInLists allows stripping null values from lists before comparison. + // Destination setups that don't support nulls in lists should set this to true. + ignoreNullsInLists bool + + // genDataOptions define how to generate test data and which data types to skip + genDatOptions schema.TestSourceOptions } // MigrateStrategy defines which tests we should include @@ -29,244 +41,97 @@ type MigrateStrategy struct { } type PluginTestSuiteTests struct { - // SkipOverwrite skips testing for "overwrite" mode. Use if the destination - // plugin doesn't support this feature. - SkipOverwrite bool - - // SkipDeleteStale skips testing "delete-stale" mode. Use if the destination - // plugin doesn't support this feature. - SkipDeleteStale bool + // SkipUpsert skips testing with MessageInsert and Upsert=true. + // Usually when a destination is not supporting primary keys + SkipUpsert bool - // SkipAppend skips testing for "append" mode. Use if the destination - // plugin doesn't support this feature. - SkipAppend bool + // SkipDelete skips testing MessageDelete events. + SkipDelete bool - // SkipSecondAppend skips the second append step in the test. - // This is useful in cases like cloud storage where you can't append to an - // existing object after the file has been closed. - SkipSecondAppend bool + // SkipAppend skips testing MessageInsert and Upsert=false. + SkipInsert bool - // SkipMigrateAppend skips a test for the migrate function where a column is added, - // data is appended, then the column is removed and more data appended, checking that the migrations handle - // this correctly. - SkipMigrateAppend bool - // SkipMigrateAppendForce skips a test for the migrate function where a column is changed in force mode - SkipMigrateAppendForce bool + // SkipMigrate skips testing migration + SkipMigrate bool - // SkipMigrateOverwrite skips a test for the migrate function where a column is added, - // data is appended, then the column is removed and more data overwritten, checking that the migrations handle - // this correctly. - SkipMigrateOverwrite bool - // SkipMigrateOverwriteForce skips a test for the migrate function where a column is changed in force mode - SkipMigrateOverwriteForce bool - - MigrateStrategyOverwrite MigrateStrategy - MigrateStrategyAppend MigrateStrategy -} - -func getTestLogger(t *testing.T) zerolog.Logger { - t.Helper() - zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs - return zerolog.New(zerolog.NewTestWriter(t)).Output( - zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.StampMicro}, - ).Level(zerolog.TraceLevel).With().Timestamp().Logger() + // MigrateStrategy defines which tests should work with force migration + // and which should pass with safe migration + MigrateStrategy MigrateStrategy } type NewPluginFunc func() *Plugin -type PluginTestSuiteRunnerOptions struct { - // IgnoreNullsInLists allows stripping null values from lists before comparison. - // Destination setups that don't support nulls in lists should set this to true. - IgnoreNullsInLists bool - - // AllowNull is a custom func to determine whether a data type may be correctly represented as null. - // Destinations that have problems representing some data types should provide a custom implementation here. - // If this param is empty, the default is to allow all data types to be nullable. - // When the value returned by this func is `true` the comparison is made with the empty value instead of null. - AllowNull AllowNullFunc - - schema.TestSourceOptions -} - -func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.AllowNull = allowNull - } -} - -func WithTestIgnoreNullsInLists() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.IgnoreNullsInLists = true - } -} - -func WithTestSourceTimePrecision(precision time.Duration) func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.TimePrecision = precision - } -} - -func WithTestSourceSkipLists() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipLists = true - } -} - -func WithTestSourceSkipTimestamps() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipTimestamps = true - } -} - -func WithTestSourceSkipDates() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipDates = true - } -} - -func WithTestSourceSkipMaps() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipMaps = true - } -} - -func WithTestSourceSkipStructs() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipStructs = true - } -} - -func WithTestSourceSkipIntervals() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipIntervals = true - } -} - -func WithTestSourceSkipDurations() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipDurations = true +func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *PluginTestSuite) { + return func(o *PluginTestSuite) { + o.allowNull = allowNull } } -func WithTestSourceSkipTimes() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipTimes = true +func WithTestIgnoreNullsInLists() func(o *PluginTestSuite) { + return func(o *PluginTestSuite) { + o.ignoreNullsInLists = true } } -func WithTestSourceSkipLargeTypes() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipLargeTypes = true +func WithTestDataOptions(opts schema.TestSourceOptions) func(o *PluginTestSuite) { + return func(o *PluginTestSuite) { + o.genDatOptions = opts } } -func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) { - return func(o *PluginTestSuiteRunnerOptions) { - o.SkipDecimals = true - } -} - -func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, spec any, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) { +func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *PluginTestSuite)) { t.Helper() suite := &PluginTestSuite{ - tests: tests, + tests: tests, + plugin: p, } - opts := PluginTestSuiteRunnerOptions{ - TestSourceOptions: schema.TestSourceOptions{ - TimePrecision: time.Microsecond, - }, - } - for _, o := range testOptions { - o(&opts) + for _, opt := range opts { + opt(suite) } ctx := context.Background() - logger := getTestLogger(t) - t.Run("TestWriteOverwrite", func(t *testing.T) { + t.Run("TestUpsert", func(t *testing.T) { t.Helper() - if suite.tests.SkipOverwrite { + if suite.tests.SkipUpsert { t.Skip("skipping " + t.Name()) } - p := newPlugin() - if err := suite.destinationPluginTestWriteOverwrite(ctx, p, logger, spec, opts); err != nil { - t.Fatal(err) - } - if err := p.Close(ctx); err != nil { + if err := suite.testUpsert(ctx); err != nil { t.Fatal(err) } }) - t.Run("TestWriteOverwriteDeleteStale", func(t *testing.T) { + t.Run("TestInsert", func(t *testing.T) { t.Helper() - if suite.tests.SkipOverwrite || suite.tests.SkipDeleteStale { + if suite.tests.SkipInsert { t.Skip("skipping " + t.Name()) } - p := newPlugin() - if err := suite.destinationPluginTestWriteOverwriteDeleteStale(ctx, p, logger, spec, opts); err != nil { - t.Fatal(err) - } - if err := p.Close(ctx); err != nil { + if err := suite.testInsert(ctx); err != nil { t.Fatal(err) } }) - t.Run("TestMigrateOverwrite", func(t *testing.T) { + t.Run("TestDelete", func(t *testing.T) { t.Helper() - if suite.tests.SkipMigrateOverwrite { + if suite.tests.SkipDelete { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeSafe - writeMode := WriteModeOverwrite - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) - }) - - t.Run("TestMigrateOverwriteForce", func(t *testing.T) { - t.Helper() - if suite.tests.SkipMigrateOverwriteForce { - t.Skip("skipping " + t.Name()) - } - migrateMode := MigrateModeForce - writeMode := WriteModeOverwrite - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) - }) - - t.Run("TestWriteAppend", func(t *testing.T) { - t.Helper() - if suite.tests.SkipAppend { - t.Skip("skipping " + t.Name()) - } - migrateMode := MigrateModeSafe - writeMode := WriteModeOverwrite - p := newPlugin() - if err := suite.destinationPluginTestWriteAppend(ctx, p, logger, migrateMode, writeMode, opts); err != nil { - t.Fatal(err) - } - if err := p.Close(ctx); err != nil { + if err := suite.testDelete(ctx); err != nil { t.Fatal(err) } }) - t.Run("TestMigrateAppend", func(t *testing.T) { + t.Run("TestMigrate", func(t *testing.T) { t.Helper() - if suite.tests.SkipMigrateAppend { + if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } migrateMode := MigrateModeSafe - writeMode := WriteModeAppend - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) + writeMode := WriteModeOverwrite + suite.destinationPluginTestMigrate(ctx, t, p, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) }) - t.Run("TestMigrateAppendForce", func(t *testing.T) { - t.Helper() - if suite.tests.SkipMigrateAppendForce { - t.Skip("skipping " + t.Name()) - } - migrateMode := MigrateModeForce - writeMode := WriteModeAppend - suite.destinationPluginTestMigrate(ctx, t, newPlugin, logger, migrateMode, writeMode, tests.MigrateStrategyAppend, opts) - }) } func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { diff --git a/plugin/testing_write_append.go b/plugin/testing_write_append.go deleted file mode 100644 index d4ccdd15d4..0000000000 --- a/plugin/testing_write_append.go +++ /dev/null @@ -1,95 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -func (s *PluginTestSuite) destinationPluginTestWriteAppend(ctx context.Context, p *Plugin, logger zerolog.Logger, migrateMode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, nil); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - tableName := fmt.Sprintf("cq_write_append_%d", time.Now().Unix()) - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - syncTime := time.Now().UTC().Round(1 * time.Second) - tables := schema.Tables{ - table, - } - if err := p.Migrate(ctx, tables, migrateMode); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) - } - - sourceName := "testAppendSource" + uuid.NewString() - - opts := schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 2, - TimePrecision: testOpts.TimePrecision, - } - record1 := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, sourceName, syncTime, writeMode, record1); err != nil { - return fmt.Errorf("failed to write record first time: %w", err) - } - - secondSyncTime := syncTime.Add(10 * time.Second).UTC() - opts.SyncTime = secondSyncTime - opts.MaxRows = 1 - record2 := schema.GenTestData(table, opts) - - if !s.tests.SkipSecondAppend { - // write second time - if err := p.writeAll(ctx, sourceName, secondSyncTime, writeMode, record2); err != nil { - return fmt.Errorf("failed to write one second time: %w", err) - } - } - - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - - expectedResource := 3 - if s.tests.SkipSecondAppend { - expectedResource = 2 - } - - if len(resourcesRead) != expectedResource { - return fmt.Errorf("expected %d resources, got %d", expectedResource, len(resourcesRead)) - } - - testOpts.AllowNull.replaceNullsByEmpty(record1) - testOpts.AllowNull.replaceNullsByEmpty(record2) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(record1) - stripNullsFromLists(record2) - } - if !recordApproxEqual(record1[0], resourcesRead[0]) { - diff := RecordDiff(record1[0], resourcesRead[0]) - return fmt.Errorf("first expected resource diff at row 0: %s", diff) - } - if !recordApproxEqual(record1[1], resourcesRead[1]) { - diff := RecordDiff(record1[1], resourcesRead[1]) - return fmt.Errorf("first expected resource diff at row 1: %s", diff) - } - - if !s.tests.SkipSecondAppend { - if !recordApproxEqual(record2[0], resourcesRead[2]) { - diff := RecordDiff(record2[0], resourcesRead[2]) - return fmt.Errorf("second expected resource diff: %s", diff) - } - } - - return nil -} diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go new file mode 100644 index 0000000000..bb4c44c2d8 --- /dev/null +++ b/plugin/testing_write_delete.go @@ -0,0 +1,84 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" + // "github.com/cloudquery/plugin-sdk/v4/types" +) + +func (s *PluginTestSuite) testDelete(ctx context.Context) error { + tableName := fmt.Sprintf("cq_delete_%d", time.Now().Unix()) + syncTime := time.Now().UTC().Round(1 * time.Second) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String}, + {Name: "sync_time", Type: arrow.FixedWidthTypes.Timestamp_us}, + }, + } + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("test") + bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime) + bldr.Field(0).(*array.StringBuilder).Append("test") + bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + + if totalItems != 2 { + return fmt.Errorf("expected 2 items, got %d", totalItems) + } + + bldr = array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("test") + bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDelete{ + Record: bldr.NewRecord(), + WhereClauses: []WhereClause{ + { + Column: "name", + Operator: OperatorLessThan, + }, + }, + }); err != nil { + return fmt.Errorf("failed to delete stale records: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems = messages.InsertItems() + + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + return nil +} diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go new file mode 100644 index 0000000000..4bc7f66c86 --- /dev/null +++ b/plugin/testing_write_insert.go @@ -0,0 +1,68 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (s *PluginTestSuite) testInsert(ctx context.Context) error { + tableName := fmt.Sprintf("cq_test_insert_%d", time.Now().Unix()) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String}, + }, + } + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("foo") + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + + totalItems = messages.InsertItems() + if totalItems != 2 { + return fmt.Errorf("expected 2 item, got %d", totalItems) + } + + return nil +} diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 978c5951a2..78468a817e 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -8,25 +8,20 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" - "github.com/rs/zerolog" - "github.com/stretchr/testify/require" ) func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog.Logger, target *schema.Table, source *schema.Table, mode MigrateMode, writeMode WriteMode, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, nil); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - - if err := p.Migrate(ctx, schema.Tables{source}, mode); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) +func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: source, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) } sourceName := target.Name @@ -35,68 +30,69 @@ func testMigration(ctx context.Context, _ *testing.T, p *Plugin, logger zerolog. SourceName: sourceName, SyncTime: syncTime, MaxRows: 1, - TimePrecision: testOpts.TimePrecision, + TimePrecision: s.genDatOptions.TimePrecision, } + resource1 := schema.GenTestData(source, opts)[0] - if err := p.writeOne(ctx, sourceName, syncTime, writeMode, resource1); err != nil { - return fmt.Errorf("failed to write one: %w", err) + + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: resource1, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) } - if err := p.Migrate(ctx, schema.Tables{target}, mode); err != nil { - return fmt.Errorf("failed to migrate existing table: %w", err) + messages, err := s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{source.Name}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) } - opts.SyncTime = syncTime.Add(time.Second).UTC() - resource2 := schema.GenTestData(target, opts) - if err := p.writeAll(ctx, sourceName, syncTime, writeMode, resource2); err != nil { - return fmt.Errorf("failed to write one after migration: %w", err) + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) } - testOpts.AllowNull.replaceNullsByEmpty(resource2) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resource2) + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: target, + Force: strategy == MigrateModeForce, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) } - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{target.Name}, - SourceName: sourceName, + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: resource1, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = s.plugin.syncAll(ctx, SyncOptions{ + Tables: []string{source.Name}, }) if err != nil { - return fmt.Errorf("failed to read all: %w", err) + return fmt.Errorf("failed to sync: %w", err) } - sortRecordsBySyncTime(target, resourcesRead) - if mode == MigrateModeSafe { - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources after write, got %d", len(resourcesRead)) - } - if !recordApproxEqual(resourcesRead[1], resource2[0]) { - diff := RecordDiff(resourcesRead[1], resource2[0]) - return fmt.Errorf("resource1 and resource2 are not equal. diff: %s", diff) + if strategy == MigrateModeSafe || mode == MigrateModeSafe { + totalItems = messages.InsertItems() + if totalItems != 2 { + return fmt.Errorf("expected 2 item, got %d", totalItems) } } else { - if len(resourcesRead) != 1 { - return fmt.Errorf("expected 1 resource after write, got %d", len(resourcesRead)) - } - if !recordApproxEqual(resourcesRead[0], resource2[0]) { - diff := RecordDiff(resourcesRead[0], resource2[0]) - return fmt.Errorf("resource1 and resource2 are not equal. diff: %s", diff) + totalItems = messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) } } return nil } -func (*PluginTestSuite) destinationPluginTestMigrate( +func (s *PluginTestSuite) testMigrate( ctx context.Context, t *testing.T, - newPlugin NewPluginFunc, - logger zerolog.Logger, - migrateMode MigrateMode, - writeMode WriteMode, - strategy MigrateStrategy, - testOpts PluginTestSuiteRunnerOptions, + mode MigrateMode, ) { t.Run("add_column", func(t *testing.T) { - if strategy.AddColumn == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.AddColumn == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -104,9 +100,6 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }, } @@ -114,25 +107,17 @@ func (*PluginTestSuite) destinationPluginTestMigrate( target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }, } - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumn, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumn, mode); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("add_column_not_null", func(t *testing.T) { - if strategy.AddColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.AddColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -140,9 +125,6 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }, } @@ -150,23 +132,16 @@ func (*PluginTestSuite) destinationPluginTestMigrate( target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.AddColumnNotNull, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumnNotNull, mode); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("remove_column", func(t *testing.T) { - if strategy.RemoveColumn == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.RemoveColumn == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -174,32 +149,21 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }} target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }} - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumn, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumn, mode); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("remove_column_not_null", func(t *testing.T) { - if strategy.RemoveColumnNotNull == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.RemoveColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -207,9 +171,6 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }, @@ -217,23 +178,15 @@ func (*PluginTestSuite) destinationPluginTestMigrate( target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, }} - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.RemoveColumnNotNull, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumnNotNull, mode); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("change_column", func(t *testing.T) { - if strategy.ChangeColumn == MigrateModeForce && migrateMode == MigrateModeSafe { + if s.tests.MigrateStrategy.ChangeColumn == MigrateModeForce && mode == MigrateModeSafe { t.Skip("skipping as migrate mode is safe") return } @@ -241,40 +194,24 @@ func (*PluginTestSuite) destinationPluginTestMigrate( source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} target := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ - schema.CqSourceNameColumn, - schema.CqSyncTimeColumn, - schema.CqIDColumn, {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.BinaryTypes.String, NotNull: true}, }} - - p := newPlugin() - if err := testMigration(ctx, t, p, logger, target, source, strategy.ChangeColumn, writeMode, testOpts); err != nil { + if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.ChangeColumn, mode); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } - if err := p.Close(ctx); err != nil { - t.Fatal(err) - } }) t.Run("double_migration", func(t *testing.T) { - tableName := "double_migration_" + tableUUIDSuffix() - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - - p := newPlugin() - require.NoError(t, p.Init(ctx, nil)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) - - require.NoError(t, p.Init(ctx, MigrateModeSafe)) - require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateModeSafe)) + // tableName := "double_migration_" + tableUUIDSuffix() + // table := schema.TestTable(tableName, testOpts.TestSourceOptions) + // require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateOptions{MigrateMode: MigrateModeForce})) + // require.NoError(t, p.Migrate(ctx, schema.Tables{table}, MigrateOptions{MigrateMode: MigrateModeForce})) }) } diff --git a/plugin/testing_write_overwrite.go b/plugin/testing_write_overwrite.go deleted file mode 100644 index fd851a6e2e..0000000000 --- a/plugin/testing_write_overwrite.go +++ /dev/null @@ -1,115 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/types" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin, logger zerolog.Logger, spec any, testOpts PluginTestSuiteRunnerOptions) error { - if err := p.Init(ctx, spec); err != nil { - return fmt.Errorf("failed to init plugin: %w", err) - } - tableName := fmt.Sprintf("cq_test_write_overwrite_%d", time.Now().Unix()) - table := schema.TestTable(tableName, testOpts.TestSourceOptions) - syncTime := time.Now().UTC().Round(1 * time.Second) - tables := schema.Tables{ - table, - } - if err := p.Migrate(ctx, tables, MigrateModeSafe); err != nil { - return fmt.Errorf("failed to migrate tables: %w", err) - } - - sourceName := "testOverwriteSource" + uuid.NewString() - - opts := schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 2, - TimePrecision: testOpts.TimePrecision, - } - resources := schema.GenTestData(table, opts) - if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, resources); err != nil { - return fmt.Errorf("failed to write all: %w", err) - } - sortRecordsBySyncTime(table, resources) - testOpts.AllowNull.replaceNullsByEmpty(resources) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(resources) - } - resourcesRead, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: syncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - - if len(resourcesRead) != 2 { - return fmt.Errorf("expected 2 resources, got %d", len(resourcesRead)) - } - - if !recordApproxEqual(resources[0], resourcesRead[0]) { - diff := RecordDiff(resources[0], resourcesRead[0]) - return fmt.Errorf("expected first resource to be equal. diff=%s", diff) - } - - if !recordApproxEqual(resources[1], resourcesRead[1]) { - diff := RecordDiff(resources[1], resourcesRead[1]) - return fmt.Errorf("expected second resource to be equal. diff=%s", diff) - } - - secondSyncTime := syncTime.Add(time.Second).UTC() - - // copy first resource but update the sync time - cqIDInds := resources[0].Schema().FieldIndices(schema.CqIDColumn.Name) - u := resources[0].Column(cqIDInds[0]).(*types.UUIDArray).Value(0) - opts = schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: secondSyncTime, - MaxRows: 1, - StableUUID: u, - TimePrecision: testOpts.TimePrecision, - } - updatedResource := schema.GenTestData(table, opts) - // write second time - if err := p.writeAll(ctx, sourceName, secondSyncTime, WriteModeOverwrite, updatedResource); err != nil { - return fmt.Errorf("failed to write one second time: %w", err) - } - - testOpts.AllowNull.replaceNullsByEmpty(updatedResource) - if testOpts.IgnoreNullsInLists { - stripNullsFromLists(updatedResource) - } - resourcesRead, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - SyncTime: secondSyncTime, - SourceName: sourceName, - }) - if err != nil { - return fmt.Errorf("failed to read all second time: %w", err) - } - sortRecordsBySyncTime(table, resourcesRead) - if len(resourcesRead) != 2 { - return fmt.Errorf("after overwrite expected 2 resources, got %d", len(resourcesRead)) - } - - if !recordApproxEqual(resources[1], resourcesRead[0]) { - diff := RecordDiff(resources[1], resourcesRead[0]) - return fmt.Errorf("after overwrite expected first resource to be equal. diff=%s", diff) - } - if !recordApproxEqual(updatedResource[0], resourcesRead[1]) { - diff := RecordDiff(updatedResource[0], resourcesRead[1]) - return fmt.Errorf("after overwrite expected second resource to be equal. diff=%s", diff) - } - - return nil -} diff --git a/plugin/testing_write_upsert.go b/plugin/testing_write_upsert.go new file mode 100644 index 0000000000..4ee1ba9db7 --- /dev/null +++ b/plugin/testing_write_upsert.go @@ -0,0 +1,69 @@ +package plugin + +import ( + "context" + "fmt" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +func (s *PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin) error { + tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, + }, + } + if err := p.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + Table: table, + }); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("foo") + + if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err := p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + totalItems := messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ + Record: bldr.NewRecord(), + Upsert: true, + }); err != nil { + return fmt.Errorf("failed to insert record: %w", err) + } + + messages, err = p.syncAll(ctx, SyncOptions{ + Tables: []string{tableName}, + }) + if err != nil { + return fmt.Errorf("failed to sync: %w", err) + } + + totalItems = messages.InsertItems() + if totalItems != 1 { + return fmt.Errorf("expected 1 item, got %d", totalItems) + } + + return nil +} diff --git a/scheduler/benchmark_test.go b/scheduler/benchmark_test.go new file mode 100644 index 0000000000..6990da0fd7 --- /dev/null +++ b/scheduler/benchmark_test.go @@ -0,0 +1 @@ +package scheduler diff --git a/scheduler/metrics.go b/scheduler/metrics.go index 372965ba93..f5b6c73ef6 100644 --- a/scheduler/metrics.go +++ b/scheduler/metrics.go @@ -7,6 +7,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +// Metrics is deprecated as we move toward open telemetry for tracing and metrics type Metrics struct { TableClient map[string]map[string]*TableClientMetrics } diff --git a/scheduler/plugin_managed_source_test.go.backup b/scheduler/plugin_managed_source_test.go.backup deleted file mode 100644 index e0a006a4ca..0000000000 --- a/scheduler/plugin_managed_source_test.go.backup +++ /dev/null @@ -1,484 +0,0 @@ -package scheduler - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/scalar" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/go-cmp/cmp" - "github.com/google/uuid" - "github.com/rs/zerolog" -) - -type testExecutionClient struct { - UnimplementedWriter -} - -var _ schema.ClientMeta = &testExecutionClient{} - -var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") -var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") - -var testSyncTime = time.Now() - -func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil -} - -func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { - panic("Resolver") -} - -func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { - panic("PreResourceResolver") -} - -func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { - panic("ColumnResolver") -} - -func testTableSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableSuccessWithPK() *schema.Table { - return &schema.Table{ - Name: "test_table_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - PrimaryKey: true, - }, - }, - } -} - -func testTableResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_resolver_panic", - Resolver: testResolverPanic, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTablePreResourceResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_pre_resource_resolver_panic", - PreResourceResolver: testPreResourceResolverPanic, - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func testTableColumnResolverPanic() *schema.Table { - return &schema.Table{ - Name: "test_table_column_resolver_panic", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "test_column1", - Type: arrow.PrimitiveTypes.Int64, - Resolver: testColumnResolverPanic, - }, - }, - } -} - -func testTableRelationSuccess() *schema.Table { - return &schema.Table{ - Name: "test_table_relation_success", - Resolver: testResolverSuccess, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - Relations: []*schema.Table{ - testTableSuccess(), - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func (*testExecutionClient) Close(context.Context) error { - return nil -} - -func (*testExecutionClient) Read(ctx context.Context, table *schema.Table, sourceName string, res chan<- arrow.Record) error { - return fmt.Errorf("not implemented") -} - -func (*testExecutionClient) Sync(ctx context.Context, res chan<- arrow.Record) error { - return fmt.Errorf("not implemented") -} - -func newTestExecutionClient(context.Context, zerolog.Logger, pbPlugin.Spec) (Client, error) { - return &testExecutionClient{}, nil -} - -type syncTestCase struct { - table *schema.Table - stats Metrics - data []scalar.Vector - deterministicCQID bool -} - -var syncTestCases = []syncTestCase{ - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - { - table: testTablePreResourceResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_pre_resource_resolver_panic": { - "testExecutionClient": { - Panics: 1, - }, - }, - }, - }, - data: nil, - }, - - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - }, - { - table: testTableSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableColumnResolverPanic(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_column_resolver_panic": { - "testExecutionClient": { - Panics: 1, - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - &scalar.Int{}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableRelationSuccess(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_relation_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.UUID{Value: randomStableUUID, Valid: true}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, - { - table: testTableSuccessWithPK(), - stats: Metrics{ - TableClient: map[string]map[string]*TableClientMetrics{ - "test_table_success": { - "testExecutionClient": { - Resources: 1, - }, - }, - }, - }, - data: []scalar.Vector{ - { - &scalar.String{Value: "testSource", Valid: true}, - &scalar.Timestamp{Value: testSyncTime, Valid: true}, - &scalar.UUID{Value: deterministicStableUUID, Valid: true}, - &scalar.UUID{}, - &scalar.Int{Value: 3, Valid: true}, - }, - }, - deterministicCQID: true, - }, -} - -type testRand struct{} - -func (testRand) Read(p []byte) (n int, err error) { - for i := range p { - p[i] = byte(0) - } - return len(p), nil -} - -func TestManagedSync(t *testing.T) { - uuid.SetRand(testRand{}) - for _, scheduler := range AllSchedulers { - for _, tc := range syncTestCases { - tc := tc - tc.table = tc.table.Copy(nil) - t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { - testSyncTable(t, tc, scheduler, tc.deterministicCQID) - }) - } - } -} - -func testSyncTable(t *testing.T, tc syncTestCase, scheduler Scheduler, deterministicCQID bool) { - ctx := context.Background() - tables := []*schema.Table{ - tc.table, - } - - plugin := NewPlugin( - "testSourcePlugin", - "1.0.0", - newTestExecutionClient, - WithStaticTables(tables), - ) - plugin.SetLogger(zerolog.New(zerolog.NewTestWriter(t))) - sourceName := "testSource" - - if err := plugin.Init(ctx, nil); err != nil { - t.Fatal(err) - } - - records, err := plugin.syncAll(ctx, sourceName, testSyncTime, SyncOptions{ - Tables: []string{"*"}, - Concurrency: 1, - Scheduler: scheduler, - DeterministicCQID: deterministicCQID, - }) - if err != nil { - t.Fatal(err) - } - - var i int - for _, record := range records { - if tc.data == nil { - t.Fatalf("Unexpected resource %v", record) - } - if i >= len(tc.data) { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - rec := tc.data[i].ToArrowRecord(record.Schema()) - if !array.RecordEqual(rec, record) { - t.Fatal(RecordDiff(rec, record)) - // t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) - } - i++ - } - if len(tc.data) != i { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } - - stats := plugin.Metrics() - if !tc.stats.Equal(stats) { - t.Fatalf("unexpected stats: %v", cmp.Diff(tc.stats, stats)) - } -} - -// func TestIgnoredColumns(t *testing.T) { -// table := &schema.Table{ -// Columns: schema.ColumnList{ -// { -// Name: "a", -// Type: arrow.BinaryTypes.String, -// IgnoreInTests: true, -// }, -// }, -// } -// validateResources(t, table, schema.Resources{{ -// Item: struct{ A *string }{}, -// Table: &schema.Table{ -// Columns: schema.ColumnList{ -// { -// Name: "a", -// Type: arrow.BinaryTypes.String, -// IgnoreInTests: true, -// }, -// }, -// }, -// }}) -// } - -var testTable struct { - PrimaryKey string - SecondaryKey string - TertiaryKey string - Quaternary string -} - -// func TestNewPluginPrimaryKeys(t *testing.T) { -// testTransforms := []struct { -// transformerOptions []transformers.StructTransformerOption -// resultKeys []string -// }{ -// { -// transformerOptions: []transformers.StructTransformerOption{transformers.WithPrimaryKeys("PrimaryKey")}, -// resultKeys: []string{"primary_key"}, -// }, -// { -// transformerOptions: []transformers.StructTransformerOption{}, -// resultKeys: []string{"_cq_id"}, -// }, -// } -// for _, tc := range testTransforms { -// tables := []*schema.Table{ -// { -// Name: "test_table", -// Transform: transformers.TransformWithStruct( -// &testTable, tc.transformerOptions..., -// ), -// }, -// } - -// plugin := NewPlugin("testSourcePlugin", "1.0.0", tables, newTestExecutionClient) -// assert.Equal(t, tc.resultKeys, plugin.tables[0].PrimaryKeys()) -// } -// } diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index d9c2654634..08d8c86166 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -57,8 +57,6 @@ func (s SchedulerStrategy) String() string { return AllSchedulerNames[s] } -const periodicMetricLoggerInterval = 30 * time.Second - type Option func(*Scheduler) func WithLogger(logger zerolog.Logger) Option { @@ -79,6 +77,12 @@ func WithConcurrency(concurrency uint64) Option { } } +func WithSchedulerStrategy(strategy SchedulerStrategy) Option { + return func(s *Scheduler) { + s.strategy = strategy + } +} + type Scheduler struct { tables schema.Tables client schema.ClientMeta @@ -104,10 +108,14 @@ func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, caser: caser.New(), concurrency: defaultConcurrency, + maxDepth: maxDepth(tables), } for _, opt := range opts { opt(&s) } + if s.maxDepth > 3 { + panic(fmt.Errorf("max depth of %d is not supported for scheduler", s.maxDepth)) + } return &s } @@ -245,6 +253,20 @@ func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, ta } } +func maxDepth(tables schema.Tables) uint64 { + var depth uint64 + if len(tables) == 0 { + return 0 + } + for _, table := range tables { + newDepth := 1 + maxDepth(table.Relations) + if newDepth > depth { + depth = newDepth + } + } + return depth +} + // unparam's suggestion to remove the second parameter is not good advice here. // nolint:unparam func max(a, b uint64) uint64 { diff --git a/scheduler/scheduler_round_robin_test.go b/scheduler/scheduler_round_robin_test.go index 3b746b81bf..5e60765063 100644 --- a/scheduler/scheduler_round_robin_test.go +++ b/scheduler/scheduler_round_robin_test.go @@ -6,13 +6,6 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -type testExecutionClient struct { -} - -func (t *testExecutionClient) ID() string { - return "test" -} - func TestRoundRobinInterleave(t *testing.T) { table1 := &schema.Table{Name: "test_table"} table2 := &schema.Table{Name: "test_table2"} diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go new file mode 100644 index 0000000000..37bd9fea56 --- /dev/null +++ b/scheduler/scheduler_test.go @@ -0,0 +1,278 @@ +package scheduler + +import ( + "context" + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/scalar" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/google/uuid" + "github.com/rs/zerolog" +) + +type testExecutionClient struct { +} + +func (t *testExecutionClient) ID() string { + return "test" +} + +var _ schema.ClientMeta = &testExecutionClient{} + +var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") +var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") + +var testSyncTime = time.Now() + +func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { + res <- map[string]any{ + "TestColumn": 3, + } + return nil +} + +func testResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, chan<- any) error { + panic("Resolver") +} + +func testPreResourceResolverPanic(context.Context, schema.ClientMeta, *schema.Resource) error { + panic("PreResourceResolver") +} + +func testColumnResolverPanic(context.Context, schema.ClientMeta, *schema.Resource, schema.Column) error { + panic("ColumnResolver") +} + +func testTableSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableSuccessWithPK() *schema.Table { + return &schema.Table{ + Name: "test_table_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + PrimaryKey: true, + }, + }, + } +} + +func testTableResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_resolver_panic", + Resolver: testResolverPanic, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTablePreResourceResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_pre_resource_resolver_panic", + PreResourceResolver: testPreResourceResolverPanic, + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + } +} + +func testTableColumnResolverPanic() *schema.Table { + return &schema.Table{ + Name: "test_table_column_resolver_panic", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + { + Name: "test_column1", + Type: arrow.PrimitiveTypes.Int64, + Resolver: testColumnResolverPanic, + }, + }, + } +} + +func testTableRelationSuccess() *schema.Table { + return &schema.Table{ + Name: "test_table_relation_success", + Resolver: testResolverSuccess, + Columns: []schema.Column{ + { + Name: "test_column", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + Relations: []*schema.Table{ + testTableSuccess(), + }, + } +} + +type syncTestCase struct { + table *schema.Table + data []scalar.Vector + deterministicCQID bool +} + +var syncTestCases = []syncTestCase{ + { + table: testTableSuccess(), + data: []scalar.Vector{ + { + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableResolverPanic(), + data: nil, + }, + { + table: testTablePreResourceResolverPanic(), + data: nil, + }, + + { + table: testTableRelationSuccess(), + data: []scalar.Vector{ + { + &scalar.Int64{Value: 3, Valid: true}, + }, + { + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + }, + { + table: testTableSuccess(), + data: []scalar.Vector{ + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + // &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + deterministicCQID: true, + }, + { + table: testTableColumnResolverPanic(), + data: []scalar.Vector{ + { + &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int64{}, + }, + }, + // deterministicCQID: true, + }, + { + table: testTableRelationSuccess(), + data: []scalar.Vector{ + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + // &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + // &scalar.UUID{Value: randomStableUUID, Valid: true}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + // deterministicCQID: true, + }, + { + table: testTableSuccessWithPK(), + data: []scalar.Vector{ + { + // &scalar.String{Value: "testSource", Valid: true}, + // &scalar.Timestamp{Value: testSyncTime, Valid: true}, + // &scalar.UUID{Value: deterministicStableUUID, Valid: true}, + // &scalar.UUID{}, + &scalar.Int64{Value: 3, Valid: true}, + }, + }, + // deterministicCQID: true, + }, +} + +func TestScheduler(t *testing.T) { + // uuid.SetRand(testRand{}) + for _, scheduler := range AllSchedulers { + for _, tc := range syncTestCases { + tc := tc + tc.table = tc.table.Copy(nil) + t.Run(tc.table.Name+"_"+scheduler.String(), func(t *testing.T) { + testSyncTable(t, tc, scheduler, tc.deterministicCQID) + }) + } + } +} + +func testSyncTable(t *testing.T, tc syncTestCase, strategy SchedulerStrategy, deterministicCQID bool) { + ctx := context.Background() + tables := []*schema.Table{ + tc.table, + } + c := testExecutionClient{} + opts := []Option{ + WithLogger(zerolog.New(zerolog.NewTestWriter(t))), + WithSchedulerStrategy(strategy), + // WithDeterministicCQId(deterministicCQID), + } + sc := NewScheduler(tables, &c, opts...) + records := make(chan arrow.Record, 10) + if err := sc.Sync(ctx, records); err != nil { + t.Fatal(err) + } + close(records) + + var i int + for record := range records { + if tc.data == nil { + t.Fatalf("Unexpected resource %v", record) + } + if i >= len(tc.data) { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } + rec := tc.data[i].ToArrowRecord(record.Schema()) + if !array.RecordEqual(rec, record) { + t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) + } + i++ + } + if len(tc.data) != i { + t.Fatalf("expected %d resources. got %d", len(tc.data), i) + } +} diff --git a/schema/resource.go b/schema/resource.go index e9d1f07da3..e55c31c262 100644 --- a/schema/resource.go +++ b/schema/resource.go @@ -97,6 +97,11 @@ func (r *Resource) CalculateCQID(deterministicCQID bool) error { } func (r *Resource) storeCQID(value uuid.UUID) error { + // We skeep if _cq_id is not present. + // Mostly the problem here is because the transformaiton step is baked into the the resolving step + if r.Table.Columns.Get(CqIDColumn.Name) == nil { + return nil + } b, err := value.MarshalBinary() if err != nil { return err diff --git a/serve/docs_test.go b/serve/docs_test.go index 9b65230168..296c9d438e 100644 --- a/serve/docs_test.go +++ b/serve/docs_test.go @@ -1 +1,20 @@ package serve + +import ( + "testing" + + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/plugin" +) + +func TestPluginDocs(t *testing.T) { + tmpDir := t.TempDir() + p := plugin.NewPlugin( + "testPlugin", + "v1.0.0", + memdb.NewMemDBClient) + srv := Plugin(p, WithArgs("doc", tmpDir), WithTestListener()) + if err := srv.newCmdPluginDoc().Execute(); err != nil { + t.Fatal(err) + } +} diff --git a/serve/plugin_test.go b/serve/plugin_test.go index e09308b704..d5357d1cb5 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -3,7 +3,6 @@ package serve import ( "bytes" "context" - "encoding/json" "io" "sync" "testing" @@ -11,67 +10,18 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/rs/zerolog" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) -type TestSourcePluginSpec struct { - Accounts []string `json:"accounts,omitempty" yaml:"accounts,omitempty"` -} - -type testExecutionClient struct { - plugin.UnimplementedSync - plugin.UnimplementedWriter - plugin.UnimplementedRead -} - -var _ schema.ClientMeta = &testExecutionClient{} - -// var errTestExecutionClientErr = fmt.Errorf("error in newTestExecutionClientErr") - -func testTable(name string) *schema.Table { - return &schema.Table{ - Name: name, - Resolver: func(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- any) error { - res <- map[string]any{ - "TestColumn": 3, - } - return nil - }, - Columns: []schema.Column{ - { - Name: "test_column", - Type: arrow.PrimitiveTypes.Int64, - }, - }, - } -} - -func (*testExecutionClient) ID() string { - return "testExecutionClient" -} - -func (*testExecutionClient) Close(ctx context.Context) error { - return nil -} - -func (c *testExecutionClient) NewManagedSyncClient(ctx context.Context, options plugin.SyncOptions) (plugin.ManagedSyncClient, error) { - return c, nil -} - -func newTestExecutionClient(context.Context, zerolog.Logger, any) (plugin.Client, error) { - return &testExecutionClient{}, nil -} - -func TestPlugin(t *testing.T) { +func TestPluginServe(t *testing.T) { p := plugin.NewPlugin( "testPlugin", "v1.0.0", - newTestExecutionClient, - plugin.WithStaticTables([]*schema.Table{testTable("test_table"), testTable("test_table2")})) + memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -110,7 +60,7 @@ func TestPlugin(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } - getTablesRes, err := c.GetStaticTables(ctx, &pb.GetStaticTables_Request{}) + getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) if err != nil { t.Fatal(err) } @@ -127,19 +77,6 @@ func TestPlugin(t *testing.T) { t.Fatal(err) } - getTablesForSpecRes, err := c.GetDynamicTables(ctx, &pb.GetDynamicTables_Request{}) - if err != nil { - t.Fatal(err) - } - tables, err = schema.NewTablesFromBytes(getTablesForSpecRes.Tables) - if err != nil { - t.Fatal(err) - } - - if len(tables) != 1 { - t.Fatalf("Expected 1 table but got %d", len(tables)) - } - syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) if err != nil { t.Fatal(err) @@ -183,28 +120,6 @@ func TestPlugin(t *testing.T) { t.Fatalf("Expected 1 resource on channel but got %d", totalResources) } - getMetricsRes, err := c.GetMetrics(ctx, &pb.GetMetrics_Request{}) - if err != nil { - t.Fatal(err) - } - var stats plugin.Metrics - if err := json.Unmarshal(getMetricsRes.Metrics, &stats); err != nil { - t.Fatal(err) - } - - clientStats := stats.TableClient[""][""] - if clientStats.Resources != 1 { - t.Fatalf("Expected 1 resource but got %d", clientStats.Resources) - } - - if clientStats.Errors != 0 { - t.Fatalf("Expected 0 errors but got %d", clientStats.Errors) - } - - if clientStats.Panics != 0 { - t.Fatalf("Expected 0 panics but got %d", clientStats.Panics) - } - cancel() wg.Wait() if serverErr != nil { diff --git a/serve/state_v3_test.go b/serve/state_v3_test.go.backup similarity index 100% rename from serve/state_v3_test.go rename to serve/state_v3_test.go.backup diff --git a/transformers/tables.go b/transformers/tables.go index 99b563e2e5..9ffbc3dd1f 100644 --- a/transformers/tables.go +++ b/transformers/tables.go @@ -1,7 +1,6 @@ package transformers import ( - "context" "fmt" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -15,33 +14,6 @@ func setParents(tables schema.Tables, parent *schema.Table) { } } -// Add internal columns -func AddInternalColumns(tables []*schema.Table) error { - for _, table := range tables { - if c := table.Column("_cq_id"); c != nil { - return fmt.Errorf("table %s already has column _cq_id", table.Name) - } - cqID := schema.CqIDColumn - if len(table.PrimaryKeys()) == 0 { - cqID.PrimaryKey = true - } - cqSourceName := schema.CqSourceNameColumn - cqSyncTime := schema.CqSyncTimeColumn - cqSourceName.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.sourceName) - } - cqSyncTime.Resolver = func(_ context.Context, _ schema.ClientMeta, resource *schema.Resource, c schema.Column) error { - return resource.Set(c.Name, p.syncTime) - } - - table.Columns = append([]schema.Column{cqSourceName, cqSyncTime, cqID, schema.CqParentIDColumn}, table.Columns...) - if err := AddInternalColumns(table.Relations); err != nil { - return err - } - } - return nil -} - // Apply transformations to tables func TransformTables(tables schema.Tables) error { for _, table := range tables { From a10fd2c37d280c991f6e789df195d7b5494f937d Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 12 Jun 2023 23:05:35 +0300 Subject: [PATCH 066/125] more wip --- internal/memdb/memdb.go | 113 +++++---- internal/memdb/memdb_test.go | 210 +++++++--------- .../servers/destination/v0/destinations.go | 109 ++++---- .../servers/destination/v1/destinations.go | 87 ++++--- internal/servers/plugin/v3/plugin.go | 232 ++++++++---------- internal/servers/plugin/v3/state.go | 46 ++-- plugin/messages.go | 79 ++---- plugin/options.go | 16 -- plugin/plugin.go | 6 + plugin/plugin_reader.go | 17 +- plugin/plugin_test.go | 91 +++---- plugin/plugin_writer.go | 4 +- ...testing_sync.go => testing_sync.go.backup} | 0 plugin/testing_upsert.go | 11 +- plugin/testing_write.go | 54 ++-- plugin/testing_write_delete.go | 32 +-- plugin/testing_write_insert.go | 13 +- plugin/testing_write_migrate.go | 12 +- plugin/testing_write_upsert.go | 69 ------ scheduler/scheduler.go | 50 ++-- scheduler/scheduler_dfs.go | 2 +- scheduler/scheduler_round_robin.go | 2 +- scheduler/scheduler_test.go | 29 +-- schema/arrow.go | 37 ++- schema/table.go | 19 ++ serve/destination_v0_test.go | 18 +- serve/destination_v1_test.go | 21 +- serve/plugin_test.go | 65 ++++- transformers/tables.go | 4 +- writers/batch_test.go | 2 +- 30 files changed, 715 insertions(+), 735 deletions(-) rename plugin/{testing_sync.go => testing_sync.go.backup} (100%) delete mode 100644 plugin/testing_write_upsert.go diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index a23316939b..ca202a82be 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "sync" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -22,21 +21,21 @@ type client struct { blockingWrite bool } -type MemDBOption func(*client) +type Option func(*client) -func WithErrOnWrite() MemDBOption { +func WithErrOnWrite() Option { return func(c *client) { c.errOnWrite = true } } -func WithBlockingWrite() MemDBOption { +func WithBlockingWrite() Option { return func(c *client) { c.blockingWrite = true } } -func GetNewClient(options ...MemDBOption) plugin.NewClientFunc { +func GetNewClient(options ...Option) plugin.NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, @@ -56,7 +55,7 @@ func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (plugin.Clien }, nil } -func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (plugin.Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, any) (plugin.Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } @@ -85,11 +84,18 @@ func (c *client) ID() string { return "testDestinationMemDB" } -func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- arrow.Record) error { +func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- plugin.Message) error { c.memoryDBLock.RLock() + for tableName := range c.memoryDB { + if !plugin.IsTable(tableName, options.Tables, options.SkipTables) { + continue + } for _, row := range c.memoryDB[tableName] { - res <- row + res <- &plugin.MessageInsert{ + Record: row, + Upsert: false, + } } } c.memoryDBLock.RUnlock() @@ -104,28 +110,25 @@ func (c *client) Tables(ctx context.Context) (schema.Tables, error) { return tables, nil } -func (c *client) Migrate(_ context.Context, tables schema.Tables, options plugin.MigrateOptions) error { - for _, table := range tables { - tableName := table.Name - memTable := c.memoryDB[tableName] - if memTable == nil { - c.memoryDB[tableName] = make([]arrow.Record, 0) - c.tables[tableName] = table - continue - } - - changes := table.GetChanges(c.tables[tableName]) - // memdb doesn't support any auto-migrate - if changes == nil { - continue - } +func (c *client) migrate(_ context.Context, table *schema.Table) { + tableName := table.Name + memTable := c.memoryDB[tableName] + if memTable == nil { c.memoryDB[tableName] = make([]arrow.Record, 0) c.tables[tableName] = table + return } - return nil + + changes := table.GetChanges(c.tables[tableName]) + // memdb doesn't support any auto-migrate + if changes == nil { + return + } + c.memoryDB[tableName] = make([]arrow.Record, 0) + c.tables[tableName] = table } -func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resources <-chan arrow.Record) error { +func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <-chan plugin.Message) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -137,19 +140,28 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, resourc return nil } - for resource := range resources { + for msg := range msgs { c.memoryDBLock.Lock() - sc := resource.Schema() - tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) - if !ok { - return fmt.Errorf("table name not found in schema metadata") - } - table := c.tables[tableName] - if options.WriteMode == plugin.WriteModeAppend { - c.memoryDB[tableName] = append(c.memoryDB[tableName], resource) - } else { - c.overwrite(table, resource) + + switch msg := msg.(type) { + case *plugin.MessageCreateTable: + c.migrate(ctx, msg.Table) + case *plugin.MessageDeleteStale: + c.deleteStale(ctx, msg) + case *plugin.MessageInsert: + sc := msg.Record.Schema() + tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) + if !ok { + return fmt.Errorf("table name not found in schema metadata") + } + table := c.tables[tableName] + if msg.Upsert { + c.overwrite(table, msg.Record) + } else { + c.memoryDB[tableName] = append(c.memoryDB[tableName], msg.Record) + } } + c.memoryDBLock.Unlock() } return nil @@ -160,22 +172,25 @@ func (c *client) Close(context.Context) error { return nil } -func (c *client) DeleteStale(ctx context.Context, tables schema.Tables, source string, syncTime time.Time) error { - for _, table := range tables { - c.deleteStaleTable(ctx, table, source, syncTime) - } - return nil -} - -func (c *client) deleteStaleTable(_ context.Context, table *schema.Table, source string, syncTime time.Time) { - sourceColIndex := table.Columns.Index(schema.CqSourceNameColumn.Name) - syncColIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) - tableName := table.Name +func (c *client) deleteStale(_ context.Context, msg *plugin.MessageDeleteStale) { var filteredTable []arrow.Record + tableName := msg.Table.Name for i, row := range c.memoryDB[tableName] { - if row.Column(sourceColIndex).(*array.String).Value(0) == source { + sc := row.Schema() + indices := sc.FieldIndices(schema.CqSourceNameColumn.Name) + if len(indices) == 0 { + continue + } + sourceColIndex := indices[0] + indices = sc.FieldIndices(schema.CqSyncTimeColumn.Name) + if len(indices) == 0 { + continue + } + syncColIndex := indices[0] + + if row.Column(sourceColIndex).(*array.String).Value(0) == msg.SourceName { rowSyncTime := row.Column(syncColIndex).(*array.Timestamp).Value(0).ToTime(arrow.Microsecond).UTC() - if !rowSyncTime.Before(syncTime) { + if !rowSyncTime.Before(msg.SyncTime) { filteredTable = append(filteredTable, c.memoryDB[tableName][i]) } } diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index e04a23bd1a..44a95c6b06 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -3,137 +3,107 @@ package memdb import ( "context" "testing" - "time" - "github.com/apache/arrow/go/v13/arrow" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/uuid" ) -var migrateStrategyOverwrite = plugin.MigrateStrategy{ - AddColumn: plugin.MigrateModeForce, - AddColumnNotNull: plugin.MigrateModeForce, - RemoveColumn: plugin.MigrateModeForce, - RemoveColumnNotNull: plugin.MigrateModeForce, - ChangeColumn: plugin.MigrateModeForce, -} - -var migrateStrategyAppend = plugin.MigrateStrategy{ - AddColumn: plugin.MigrateModeForce, - AddColumnNotNull: plugin.MigrateModeForce, - RemoveColumn: plugin.MigrateModeForce, - RemoveColumnNotNull: plugin.MigrateModeForce, - ChangeColumn: plugin.MigrateModeForce, -} - -func TestPluginUnmanagedClient(t *testing.T) { +func TestPlugin(t *testing.T) { + ctx := context.Background() + p := plugin.NewPlugin("test", "development", NewMemDBClient) + if err := p.Init(ctx, nil); err != nil { + t.Fatal(err) + } plugin.PluginTestSuiteRunner( t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewMemDBClient) - }, - nil, + p, plugin.PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }, - ) -} - -func TestPluginManagedClientWithCQPKs(t *testing.T) { - plugin.PluginTestSuiteRunner(t, - func() *plugin.Plugin { - return plugin.NewPlugin("test", "development", NewMemDBClient) - }, - pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{ - PkMode: pbPlugin.WriteSpec_CQ_ID_ONLY, + MigrateStrategy: plugin.MigrateStrategy{ + AddColumn: plugin.MigrateModeForce, + AddColumnNotNull: plugin.MigrateModeForce, + RemoveColumn: plugin.MigrateModeForce, + RemoveColumnNotNull: plugin.MigrateModeForce, + ChangeColumn: plugin.MigrateModeForce, }, }, - plugin.PluginTestSuiteTests{ - MigrateStrategyOverwrite: migrateStrategyOverwrite, - MigrateStrategyAppend: migrateStrategyAppend, - }) + ) } -func TestPluginOnNewError(t *testing.T) { - ctx := context.Background() - p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) - err := p.Init(ctx, nil) +// func TestPluginOnNewError(t *testing.T) { +// ctx := context.Background() +// p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) +// err := p.Init(ctx, nil) - if err == nil { - t.Fatal("expected error") - } -} +// if err == nil { +// t.Fatal("expected error") +// } +// } -func TestOnWriteError(t *testing.T) { - ctx := context.Background() - newClientFunc := GetNewClient(WithErrOnWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, nil); err != nil { - t.Fatal(err) - } - table := schema.TestTable("test", schema.TestSourceOptions{}) - tables := schema.Tables{ - table, - } - sourceName := "TestDestinationOnWriteError" - syncTime := time.Now() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } - ch := make(chan arrow.Record, 1) - opts := schema.GenTestDataOptions{ - SourceName: "test", - SyncTime: time.Now(), - MaxRows: 1, - StableUUID: uuid.Nil, - } - record := schema.GenTestData(table, opts)[0] - ch <- record - close(ch) - err := p.Write(ctx, sourceSpec, tables, syncTime, ch) - if err == nil { - t.Fatal("expected error") - } - if err.Error() != "errOnWrite" { - t.Fatalf("expected errOnWrite, got %s", err.Error()) - } -} +// func TestOnWriteError(t *testing.T) { +// ctx := context.Background() +// newClientFunc := GetNewClient(WithErrOnWrite()) +// p := plugin.NewPlugin("test", "development", newClientFunc) +// if err := p.Init(ctx, nil); err != nil { +// t.Fatal(err) +// } +// table := schema.TestTable("test", schema.TestSourceOptions{}) +// tables := schema.Tables{ +// table, +// } +// sourceName := "TestDestinationOnWriteError" +// syncTime := time.Now() +// sourceSpec := pbPlugin.Spec{ +// Name: sourceName, +// } +// ch := make(chan arrow.Record, 1) +// opts := schema.GenTestDataOptions{ +// SourceName: "test", +// SyncTime: time.Now(), +// MaxRows: 1, +// StableUUID: uuid.Nil, +// } +// record := schema.GenTestData(table, opts)[0] +// ch <- record +// close(ch) +// err := p.Write(ctx, sourceSpec, tables, syncTime, ch) +// if err == nil { +// t.Fatal("expected error") +// } +// if err.Error() != "errOnWrite" { +// t.Fatalf("expected errOnWrite, got %s", err.Error()) +// } +// } -func TestOnWriteCtxCancelled(t *testing.T) { - ctx := context.Background() - newClientFunc := GetNewClient(WithBlockingWrite()) - p := plugin.NewPlugin("test", "development", newClientFunc) - if err := p.Init(ctx, pbPlugin.Spec{ - WriteSpec: &pbPlugin.WriteSpec{}, - }); err != nil { - t.Fatal(err) - } - table := schema.TestTable("test", schema.TestSourceOptions{}) - tables := schema.Tables{ - table, - } - sourceName := "TestDestinationOnWriteError" - syncTime := time.Now() - sourceSpec := pbPlugin.Spec{ - Name: sourceName, - } - ch := make(chan arrow.Record, 1) - ctx, cancel := context.WithTimeout(ctx, 2*time.Second) - opts := schema.GenTestDataOptions{ - SourceName: "test", - SyncTime: time.Now(), - MaxRows: 1, - StableUUID: uuid.Nil, - } - record := schema.GenTestData(table, opts)[0] - ch <- record - defer cancel() - err := p.Write(ctx, sourceSpec, tables, syncTime, ch) - if err != nil { - t.Fatal(err) - } -} +// func TestOnWriteCtxCancelled(t *testing.T) { +// ctx := context.Background() +// newClientFunc := GetNewClient(WithBlockingWrite()) +// p := plugin.NewPlugin("test", "development", newClientFunc) +// if err := p.Init(ctx, pbPlugin.Spec{ +// WriteSpec: &pbPlugin.WriteSpec{}, +// }); err != nil { +// t.Fatal(err) +// } +// table := schema.TestTable("test", schema.TestSourceOptions{}) +// tables := schema.Tables{ +// table, +// } +// sourceName := "TestDestinationOnWriteError" +// syncTime := time.Now() +// sourceSpec := pbPlugin.Spec{ +// Name: sourceName, +// } +// ch := make(chan arrow.Record, 1) +// ctx, cancel := context.WithTimeout(ctx, 2*time.Second) +// opts := schema.GenTestDataOptions{ +// SourceName: "test", +// SyncTime: time.Now(), +// MaxRows: 1, +// StableUUID: uuid.Nil, +// } +// record := schema.GenTestData(table, opts)[0] +// ch <- record +// defer cancel() +// err := p.Write(ctx, sourceSpec, tables, syncTime, ch) +// if err != nil { +// t.Fatal(err) +// } +// } diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 4c22750e69..ad5506e161 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -4,8 +4,9 @@ import ( "context" "encoding/json" "io" + "sync" - "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" @@ -21,11 +22,9 @@ import ( type Server struct { pb.UnimplementedDestinationServer - Plugin *plugin.Plugin - Logger zerolog.Logger - spec specs.Destination - writeMode plugin.WriteMode - migrateMode plugin.MigrateMode + Plugin *plugin.Plugin + Logger zerolog.Logger + spec specs.Destination } func (*Server) GetProtocolVersion(context.Context, *pbBase.GetProtocolVersion_Request) (*pbBase.GetProtocolVersion_Response, error) { @@ -40,20 +39,6 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - switch s.spec.WriteMode { - case specs.WriteModeAppend: - s.writeMode = plugin.WriteModeAppend - case specs.WriteModeOverwrite: - s.writeMode = plugin.WriteModeOverwrite - case specs.WriteModeOverwriteDeleteStale: - s.writeMode = plugin.WriteModeOverwriteDeleteStale - } - switch s.spec.MigrateMode { - case specs.MigrateModeSafe: - s.migrateMode = plugin.MigrateModeSafe - case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForce - } return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) } @@ -77,17 +62,22 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) - - var migrateMode plugin.MigrateMode - switch s.spec.MigrateMode { - case specs.MigrateModeSafe: - migrateMode = plugin.MigrateModeSafe - case specs.MigrateModeForced: - migrateMode = plugin.MigrateModeForce - default: - return nil, status.Errorf(codes.InvalidArgument, "invalid migrate mode: %v", s.spec.MigrateMode) + writeCh := make(chan plugin.Message) + eg, ctx := errgroup.WithContext(ctx) + eg.Go(func() error { + return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + }) + for _, table := range tables { + writeCh <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + } + } + close(writeCh) + if err := eg.Wait(); err != nil { + return nil, status.Errorf(codes.Internal, "failed to write: %v", err) } - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) + return &pb.Migrate_Response{}, nil } func (*Server) Write(pb.Destination_WriteServer) error { @@ -97,7 +87,7 @@ func (*Server) Write(pb.Destination_WriteServer) error { // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write2(msg pb.Destination_Write2Server) error { - resources := make(chan arrow.Record) + msgs := make(chan plugin.Message) r, err := msg.Recv() if err != nil { @@ -126,10 +116,18 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceName := r.Source + // sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) + return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) }) + + for _, table := range tables { + msgs <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + } + } + sourceColumn := &schemav2.Text{} _ = sourceColumn.Set(sourceSpec.Name) syncTimeColumn := &schemav2.Timestamptz{} @@ -138,30 +136,32 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { for { r, err := msg.Recv() if err == io.EOF { - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "write failed: %v", err) } return msg.SendAndClose(&pb.Write2_Response{}) } if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) } return status.Errorf(codes.Internal, "failed to receive msg: %v", err) } + var origResource schemav2.DestinationResource if err := json.Unmarshal(r.Resource, &origResource); err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.InvalidArgument, "failed to unmarshal resource: %v and write failed: %v", err, wgErr) } return status.Errorf(codes.InvalidArgument, "failed to unmarshal resource: %v", err) } + table := tables.Get(origResource.TableName) if table == nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.InvalidArgument, "failed to get table: %s and write failed: %v", origResource.TableName, wgErr) } @@ -173,11 +173,15 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { origResource.Data = append([]schemav2.CQType{sourceColumn, syncTimeColumn}, origResource.Data...) } convertedResource := CQTypesToRecord(memory.DefaultAllocator, []schemav2.CQTypes{origResource.Data}, table.ToArrowSchema()) + msg := &plugin.MessageInsert{ + Record: convertedResource, + Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, + } + select { - case resources <- convertedResource: + case msgs <- msg: case <-ctx.Done(): - convertedResource.Release() - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) } @@ -228,11 +232,28 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( } tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) - if err := s.Plugin.DeleteStale(ctx, tables, req.Source, req.Timestamp.AsTime()); err != nil { - return nil, err - } - return &pb.DeleteStale_Response{}, nil + msgs := make(chan plugin.Message) + var writeErr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + writeErr = s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + }() + for _, table := range tables { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) + bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) + msgs <- &plugin.MessageDeleteStale{ + Table: table, + SourceName: req.Source, + SyncTime: req.Timestamp.AsTime(), + } + } + close(msgs) + wg.Wait() + return &pb.DeleteStale_Response{}, writeErr } func (s *Server) setPKsForTables(tables schema.Tables) { diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 45cfa4f7f7..0bfdb886ca 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -5,9 +5,11 @@ import ( "context" "encoding/json" "io" + "sync" - "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" @@ -23,7 +25,6 @@ type Server struct { Plugin *plugin.Plugin Logger zerolog.Logger spec specs.Destination - writeMode plugin.WriteMode migrateMode plugin.MigrateMode } @@ -33,20 +34,6 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - switch s.spec.WriteMode { - case specs.WriteModeAppend: - s.writeMode = plugin.WriteModeAppend - case specs.WriteModeOverwrite: - s.writeMode = plugin.WriteModeOverwrite - case specs.WriteModeOverwriteDeleteStale: - s.writeMode = plugin.WriteModeOverwriteDeleteStale - } - switch s.spec.MigrateMode { - case specs.MigrateModeSafe: - s.migrateMode = plugin.MigrateModeSafe - case specs.MigrateModeForced: - s.migrateMode = plugin.MigrateModeForce - } return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) } @@ -73,13 +60,28 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr } s.setPKsForTables(tables) - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, s.migrateMode) + writeCh := make(chan plugin.Message) + eg, ctx := errgroup.WithContext(ctx) + eg.Go(func() error { + return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + }) + for _, table := range tables { + writeCh <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.migrateMode == plugin.MigrateModeForce, + } + } + close(writeCh) + if err := eg.Wait(); err != nil { + return nil, status.Errorf(codes.Internal, "failed to write: %v", err) + } + return &pb.Migrate_Response{}, nil } // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write(msg pb.Destination_WriteServer) error { - resources := make(chan arrow.Record) + msgs := make(chan plugin.Message) r, err := msg.Recv() if err != nil { @@ -108,26 +110,31 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { return status.Errorf(codes.InvalidArgument, "failed to unmarshal source spec: %v", err) } } - syncTime := r.Timestamp.AsTime() s.setPKsForTables(tables) eg, ctx := errgroup.WithContext(msg.Context()) - sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, sourceName, tables, syncTime, s.writeMode, resources) + return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) }) + for _, table := range tables { + msgs <- &plugin.MessageCreateTable{ + Table: table, + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + } + } + for { r, err := msg.Recv() if err == io.EOF { - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "write failed: %v", err) } return msg.SendAndClose(&pb.Write_Response{}) } if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) } @@ -135,7 +142,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { } rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.InvalidArgument, "failed to create reader: %v and write failed: %v", err, wgErr) } @@ -144,10 +151,14 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() + msg := &plugin.MessageInsert{ + Record: rec, + Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, + } select { - case resources <- rec: + case msgs <- msg: case <-ctx.Done(): - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) } @@ -190,11 +201,27 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - if err := s.Plugin.DeleteStale(ctx, tables, req.Source, req.Timestamp.AsTime()); err != nil { - return nil, err + msgs := make(chan plugin.Message) + var writeErr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + writeErr = s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + }() + for _, table := range tables { + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) + bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) + msgs <- &plugin.MessageDeleteStale{ + Table: table, + SourceName: req.Source, + SyncTime: req.Timestamp.AsTime(), + } } - - return &pb.DeleteStale_Response{}, nil + close(msgs) + wg.Wait() + return &pb.DeleteStale_Response{}, writeErr } func (s *Server) setPKsForTables(tables schema.Tables) { diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 92c7c27cc1..64e166e9ec 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -1,14 +1,12 @@ package plugin import ( - "bytes" "context" "errors" "fmt" "io" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/plugin" @@ -19,6 +17,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" ) const MaxMsgSize = 100 * 1024 * 1024 // 100 MiB @@ -31,9 +30,12 @@ type Server struct { NoSentry bool } -func (s *Server) GetTables(context.Context, *pb.GetTables_Request) (*pb.GetTables_Response, error) { - tables := s.Plugin.Tables().ToArrowSchemas() - encoded, err := tables.Encode() +func (s *Server) GetTables(ctx context.Context, _ *pb.GetTables_Request) (*pb.GetTables_Response, error) { + tables, err := s.Plugin.Tables(ctx) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to get tables: %v", err) + } + encoded, err := tables.ToArrowSchemas().Encode() if err != nil { return nil, fmt.Errorf("failed to encode tables: %w", err) } @@ -62,7 +64,7 @@ func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Respo } func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { - records := make(chan arrow.Record) + msgs := make(chan plugin.Message) var syncErr error ctx := stream.Context() @@ -72,8 +74,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { Concurrency: req.Concurrency, } - // sourceName := req.SourceName - if req.StateBackend != nil { opts := []managedplugin.Option{ managedplugin.WithLogger(s.Logger), @@ -90,51 +90,69 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { if err != nil { return status.Errorf(codes.Internal, "failed to create state plugin: %v", err) } - stateClient, err := newStateClient(ctx, statePlugin.Conn, *req.StateBackend) + stateClient, err := newStateClient(ctx, statePlugin.Conn, req.StateBackend) if err != nil { return status.Errorf(codes.Internal, "failed to create state client: %v", err) } syncOptions.StateBackend = stateClient } - if req.SyncTime != nil { - syncOptions.SyncTime = req.SyncTime.AsTime() - } - - if req.SourceName != "" { - syncOptions.SourceName = req.SourceName - } go func() { - defer close(records) - err := s.Plugin.Sync(ctx, syncOptions, records) + defer close(msgs) + err := s.Plugin.Sync(ctx, syncOptions, msgs) if err != nil { syncErr = fmt.Errorf("failed to sync records: %w", err) } }() - for rec := range records { - var buf bytes.Buffer - w := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) - if err := w.Write(rec); err != nil { - return status.Errorf(codes.Internal, "failed to write record: %v", err) - } - if err := w.Close(); err != nil { - return status.Errorf(codes.Internal, "failed to close writer: %v", err) + pbMsg := &pb.Sync_Response{} + for msg := range msgs { + switch m := msg.(type) { + case *plugin.MessageCreateTable: + m.Table.ToArrowSchema() + pbMsg.Message = &pb.Sync_Response_CreateTable{ + CreateTable: &pb.MessageCreateTable{ + Table: nil, + MigrateForce: m.MigrateForce, + }, + } + case *plugin.MessageInsert: + recordBytes, err := schema.RecordToBytes(m.Record) + if err != nil { + return status.Errorf(codes.Internal, "failed to encode record: %v", err) + } + pbMsg.Message = &pb.Sync_Response_Insert{ + Insert: &pb.MessageInsert{ + Record: recordBytes, + Upsert: m.Upsert, + }, + } + case *plugin.MessageDeleteStale: + tableBytes, err := m.Table.ToArrowSchemaBytes() + if err != nil { + return status.Errorf(codes.Internal, "failed to encode record: %v", err) + } + pbMsg.Message = &pb.Sync_Response_Delete{ + Delete: &pb.MessageDeleteStale{ + Table: tableBytes, + SourceName: m.SourceName, + SyncTime: timestamppb.New(m.SyncTime), + }, + } + default: + return status.Errorf(codes.Internal, "unknown message type: %T", msg) } - msg := &pb.Sync_Response{ - Resource: buf.Bytes(), - } - err := checkMessageSize(msg, rec) - if err != nil { - sc := rec.Schema() - tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) - s.Logger.Warn().Str("table", tName). - Int("bytes", len(msg.String())). - Msg("Row exceeding max bytes ignored") - continue - } - if err := stream.Send(msg); err != nil { + // err := checkMessageSize(msg, rec) + // if err != nil { + // sc := rec.Schema() + // tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) + // s.Logger.Warn().Str("table", tName). + // Int("bytes", len(msg.String())). + // Msg("Row exceeding max bytes ignored") + // continue + // } + if err := stream.Send(pbMsg); err != nil { return status.Errorf(codes.Internal, "failed to send resource: %v", err) } } @@ -142,105 +160,82 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { return syncErr } -func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migrate_Response, error) { - schemas, err := schema.NewSchemasFromBytes(req.Tables) - if err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) - } - tables, err := schema.NewTablesFromArrowSchemas(schemas) - if err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) - } - if req.PkMode == pb.PK_MODE_CQ_ID_ONLY { - setCQIDAsPrimaryKeysForTables(tables) - } - migrateMode := plugin.MigrateModeSafe - switch req.MigrateMode { - case pb.MIGRATE_MODE_SAFE: - migrateMode = plugin.MigrateModeSafe - case pb.MIGRATE_MODE_FORCE: - migrateMode = plugin.MigrateModeForce - } - return &pb.Migrate_Response{}, s.Plugin.Migrate(ctx, tables, migrateMode) -} - func (s *Server) Write(msg pb.Plugin_WriteServer) error { - resources := make(chan arrow.Record) + msgs := make(chan plugin.Message) - r, err := msg.Recv() - if err != nil { - if err == io.EOF { - return msg.SendAndClose(&pb.Write_Response{}) - } - return status.Errorf(codes.Internal, "failed to receive msg: %v", err) - } - - schemas, err := schema.NewSchemasFromBytes(r.Tables) - if err != nil { - return status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) - } - tables, err := schema.NewTablesFromArrowSchemas(schemas) - if err != nil { - return status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) - } - if r.PkMode == pb.PK_MODE_CQ_ID_ONLY { - setCQIDAsPrimaryKeysForTables(tables) - } - sourceName := r.SourceName - syncTime := r.SyncTime.AsTime() - writeMode := plugin.WriteModeOverwrite - switch r.WriteMode { - case pb.WRITE_MODE_WRITE_MODE_APPEND: - writeMode = plugin.WriteModeAppend - case pb.WRITE_MODE_WRITE_MODE_OVERWRITE: - writeMode = plugin.WriteModeOverwrite - case pb.WRITE_MODE_WRITE_MODE_OVERWRITE_DELETE_STALE: - writeMode = plugin.WriteModeOverwriteDeleteStale - } eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, sourceName, tables, syncTime, writeMode, resources) + return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) }) for { r, err := msg.Recv() if err == io.EOF { - close(resources) + close(msgs) if err := eg.Wait(); err != nil { return status.Errorf(codes.Internal, "write failed: %v", err) } return msg.SendAndClose(&pb.Write_Response{}) } if err != nil { - close(resources) + close(msgs) if wgErr := eg.Wait(); wgErr != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v and write failed: %v", err, wgErr) } return status.Errorf(codes.Internal, "failed to receive msg: %v", err) } - rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) - if err != nil { - close(resources) - if wgErr := eg.Wait(); wgErr != nil { - return status.Errorf(codes.InvalidArgument, "failed to create reader: %v and write failed: %v", err, wgErr) + var pluginMessage plugin.Message + var pbMsgConvertErr error + switch pbMsg := r.Message.(type) { + case *pb.Write_Request_CreateTable: + table, err := schema.NewTableFromBytes(pbMsg.CreateTable.Table) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) + break + } + pluginMessage = &plugin.MessageCreateTable{ + Table: table, + MigrateForce: pbMsg.CreateTable.MigrateForce, + } + case *pb.Write_Request_Insert: + record, err := schema.NewRecordFromBytes(pbMsg.Insert.Record) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) + break + } + pluginMessage = &plugin.MessageInsert{ + Record: record, + Upsert: pbMsg.Insert.Upsert, + } + case *pb.Write_Request_Delete: + table, err := schema.NewTableFromBytes(pbMsg.Delete.Table) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) + break + } + pluginMessage = &plugin.MessageDeleteStale{ + Table: table, + SourceName: pbMsg.Delete.SourceName, + SyncTime: pbMsg.Delete.SyncTime.AsTime(), } - return status.Errorf(codes.InvalidArgument, "failed to create reader: %v", err) } - for rdr.Next() { - rec := rdr.Record() - rec.Retain() - select { - case resources <- rec: - case <-ctx.Done(): - close(resources) - if err := eg.Wait(); err != nil { - return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) - } - return status.Errorf(codes.Internal, "Context done: %v", ctx.Err()) + + if pbMsgConvertErr != nil { + close(msgs) + if wgErr := eg.Wait(); wgErr != nil { + return status.Errorf(codes.Internal, "failed to convert message: %v and write failed: %v", pbMsgConvertErr, wgErr) } + return pbMsgConvertErr } - if err := rdr.Err(); err != nil { - return status.Errorf(codes.InvalidArgument, "failed to read resource: %v", err) + + select { + case msgs <- pluginMessage: + case <-ctx.Done(): + close(msgs) + if err := eg.Wait(); err != nil { + return status.Errorf(codes.Internal, "Context done: %v and failed to wait for plugin: %v", ctx.Err(), err) + } + return status.Errorf(codes.Internal, "Context done: %v", ctx.Err()) } } } @@ -263,15 +258,6 @@ func checkMessageSize(msg proto.Message, record arrow.Record) error { return nil } -func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { - for _, table := range tables { - for i, col := range table.Columns { - table.Columns[i].PrimaryKey = col.Name == schema.CqIDColumn.Name - } - setCQIDAsPrimaryKeysForTables(table.Relations) - } -} - func (s *Server) Close(ctx context.Context, _ *pb.Close_Request) (*pb.Close_Response, error) { return &pb.Close_Response{}, s.Plugin.Close(ctx) } diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index be152297b7..81fd753a5c 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -23,14 +23,13 @@ const keyColumn = "key" const valueColumn = "value" type ClientV3 struct { - client pbPlugin.PluginClient - encodedTables [][]byte - mem map[string]string - keys []string - values []string + client pbPlugin.PluginClient + mem map[string]string + keys []string + values []string } -func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.StateBackendSpec) (state.Client, error) { +func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec *pbPlugin.StateBackendSpec) (state.Client, error) { discoveryClient := pbDiscovery.NewDiscoveryClient(conn) versions, err := discoveryClient.GetVersions(ctx, &pbDiscovery.GetVersions_Request{}) if err != nil { @@ -61,8 +60,7 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St }, }, } - tables := schema.Tables{table} - c.encodedTables, err = tables.ToArrowSchemas().Encode() + tableBytes, err := table.ToArrowSchemaBytes() if err != nil { return nil, err } @@ -73,9 +71,17 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St return nil, err } - if _, err := c.client.Migrate(ctx, &pbPlugin.Migrate_Request{ - Tables: c.encodedTables, - MigrateMode: pbPlugin.MIGRATE_MODE_SAFE, + writeClient, err := c.client.Write(ctx) + if err != nil { + return nil, err + } + + if err := writeClient.Send(&pbPlugin.Write_Request{ + Message: &pbPlugin.Write_Request_CreateTable{ + CreateTable: &pbPlugin.MessageCreateTable{ + Table: tableBytes, + }, + }, }); err != nil { return nil, err } @@ -94,7 +100,11 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec pbPlugin.St } return nil, err } - rdr, err := ipc.NewReader(bytes.NewReader(res.Resource)) + insertMessage := res.GetInsert() + if insertMessage == nil { + return nil, fmt.Errorf("unexpected message type %T", res) + } + rdr, err := ipc.NewReader(bytes.NewReader(insertMessage.Record)) if err != nil { return nil, err } @@ -141,12 +151,12 @@ func (c *ClientV3) flush(ctx context.Context) error { return err } if err := writeClient.Send(&pbPlugin.Write_Request{ - WriteMode: pbPlugin.WRITE_MODE_WRITE_MODE_OVERWRITE, - }); err != nil { - return err - } - if err := writeClient.Send(&pbPlugin.Write_Request{ - Resource: buf.Bytes(), + Message: &pbPlugin.Write_Request_Insert{ + Insert: &pbPlugin.MessageInsert{ + Record: buf.Bytes(), + Upsert: true, + }, + }, }); err != nil { return err } diff --git a/plugin/messages.go b/plugin/messages.go index fa975cc97c..43e3eedacb 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -1,83 +1,42 @@ package plugin import ( + "time" + "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" ) -type MessageType int - -const ( - // Create table - MessageTypeCreate MessageType = iota - // Insert record - MessageTypeInsert - // Insert or update record - MessageTypeUpsert - // Delete rows - MessageTypeDelete -) - type MessageCreateTable struct { - Table *schema.Table - Force bool -} - -func (*MessageCreateTable) Type() MessageType { - return MessageTypeCreate + Table *schema.Table + MigrateForce bool } type MessageInsert struct { - Record arrow.Record - Columns []string - Upsert bool -} - -func (*MessageInsert) Type() MessageType { - return MessageTypeInsert -} - -type Operator int - -const ( - OperatorEqual Operator = iota - OperatorNotEqual - OperatorGreaterThan - OperatorGreaterThanOrEqual - OperatorLessThan - OperatorLessThanOrEqual -) - -type WhereClause struct { - Column string - Operator Operator - Value string -} - -type MessageDelete struct { Record arrow.Record - // currently delete only supports and where clause as we don't support - // full AST parsing - WhereClauses []WhereClause + Upsert bool } -func (*MessageDelete) Type() MessageType { - return MessageTypeDelete +// MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case +// thus it might be deprecated in the future +// in favour of MessageDelete or MessageRawQuery +// The message indeciates that the destination needs to run something like "DELETE FROM table WHERE _cq_source_name=$1 and sync_time < $2" +type MessageDeleteStale struct { + Table *schema.Table + SourceName string + SyncTime time.Time } -type Message interface { - Type() MessageType -} +type Message any type Messages []Message -func (m Messages) InsertItems() int64 { +func (messages Messages) InsertItems() int64 { items := int64(0) - for _, msg := range m { - switch msg.Type() { - case MessageTypeInsert: - msgInsert := msg.(*MessageInsert) - items += msgInsert.Record.NumRows() + for _, msg := range messages { + switch m := msg.(type) { + case *MessageInsert: + items += m.Record.NumRows() } } return items diff --git a/plugin/options.go b/plugin/options.go index 09a771d0b6..966f692e60 100644 --- a/plugin/options.go +++ b/plugin/options.go @@ -15,20 +15,4 @@ func (m MigrateMode) String() string { return migrateModeStrings[m] } -type WriteMode int - -const ( - WriteModeOverwriteDeleteStale WriteMode = iota - WriteModeOverwrite - WriteModeAppend -) - -var ( - writeModeStrings = []string{"overwrite-delete-stale", "overwrite", "append"} -) - -func (m WriteMode) String() string { - return writeModeStrings[m] -} - type Option func(*Plugin) diff --git a/plugin/plugin.go b/plugin/plugin.go index 9900d16e26..b583e86811 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -104,6 +104,9 @@ func (p *Plugin) SetLogger(logger zerolog.Logger) { } func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { + if p.client == nil { + return nil, fmt.Errorf("plugin not initialized") + } tables, err := p.client.Tables(ctx) if err != nil { return nil, fmt.Errorf("failed to get tables: %w", err) @@ -132,5 +135,8 @@ func (p *Plugin) Close(ctx context.Context) error { return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() + if p.client == nil { + return nil + } return p.client.Close(ctx) } diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 89963d7eb4..4f9d51e66a 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + "github.com/cloudquery/plugin-sdk/v4/internal/glob" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" @@ -23,6 +24,20 @@ type ReadOnlyClient interface { Close(ctx context.Context) error } +func IsTable(name string, includeTablesPattern []string, skipTablesPattern []string) bool { + for _, pattern := range skipTablesPattern { + if glob.Glob(pattern, name) { + return false + } + } + for _, pattern := range includeTablesPattern { + if glob.Glob(pattern, name) { + return true + } + } + return false +} + type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (ReadOnlyClient, error) // NewReadOnlyPlugin returns a new CloudQuery Plugin with the given name, version and implementation. @@ -44,7 +59,7 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } -func (p *Plugin) syncAll(ctx context.Context, options SyncOptions) (Messages, error) { +func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, error) { var err error ch := make(chan Message) go func() { diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index d0016a113b..e33670b6d3 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -3,71 +3,72 @@ package plugin import ( "context" "testing" - "time" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/rs/zerolog" ) -func TestPluginUnmanagedSync(t *testing.T) { - ctx := context.Background() - p := NewPlugin("test", "v0.0.0", NewMemDBClient) - testTable := schema.TestTable("test_table", schema.TestSourceOptions{}) - syncTime := time.Now().UTC() - sourceName := "test" - testRecords := schema.GenTestData(testTable, schema.GenTestDataOptions{ - SourceName: sourceName, - SyncTime: syncTime, - MaxRows: 1, - }) - if err := p.Init(ctx, nil); err != nil { - t.Fatal(err) - } +type testPluginSpec struct { +} - if err := p.Migrate(ctx, schema.Tables{testTable}, MigrateModeSafe); err != nil { - t.Fatal(err) +type testPluginClient struct { + messages []Message +} + +func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { + return &testPluginClient{}, nil +} + +func (c *testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { + return schema.Tables{}, nil +} +func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { + for _, msg := range c.messages { + res <- msg + } + return nil +} +func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { + for msg := range res { + c.messages = append(c.messages, msg) } - if err := p.writeAll(ctx, sourceName, syncTime, WriteModeOverwrite, testRecords); err != nil { + return nil +} +func (c *testPluginClient) Close(context.Context) error { + return nil +} + +func TestPluginSuccess(t *testing.T) { + ctx := context.Background() + p := NewPlugin("test", "v1.0.0", newTestPluginClient) + if err := p.Init(ctx, &testPluginSpec{}); err != nil { t.Fatal(err) } - gotRecords, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{testTable.Name}, - }) + tables, err := p.Tables(ctx) if err != nil { t.Fatal(err) } - if len(gotRecords) != len(testRecords) { - t.Fatalf("got %d records, want %d", len(gotRecords), len(testRecords)) - } - if !array.RecordEqual(testRecords[0], gotRecords[0]) { - t.Fatal("records are not equal") + if len(tables) != 0 { + t.Fatal("expected 0 tables") } - records, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{testTable.Name}, - }) - if err != nil { + if err := p.WriteAll(ctx, WriteOptions{}, nil); err != nil { t.Fatal(err) } - if len(records) != 1 { - t.Fatalf("got %d resources, want 1", len(records)) + if err := p.WriteAll(ctx, WriteOptions{}, []Message{ + MessageCreateTable{}, + }); err != nil { + t.Fatal(err) } - - if !array.RecordEqual(testRecords[0], records[0]) { - t.Fatal("records are not equal") + if len(p.client.(*testPluginClient).messages) != 1 { + t.Fatal("expected 1 message") } - newSyncTime := time.Now().UTC() - if err := p.DeleteStale(ctx, schema.Tables{testTable}, "test", newSyncTime); err != nil { - t.Fatal(err) - } - records, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{testTable.Name}, - }) + messages, err := p.SyncAll(ctx, SyncOptions{}) if err != nil { t.Fatal(err) } - if len(records) != 0 { - t.Fatalf("got %d resources, want 0", len(records)) + if len(messages) != 1 { + t.Fatal("expected 1 message") } if err := p.Close(ctx); err != nil { diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 009ac23ad7..5b5d64912a 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -10,11 +10,11 @@ type WriteOptions struct { // this function is currently used mostly for testing so it's not a public api func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { resources := []Message{resource} - return p.writeAll(ctx, options, resources) + return p.WriteAll(ctx, options, resources) } // this function is currently used mostly for testing so it's not a public api -func (p *Plugin) writeAll(ctx context.Context, options WriteOptions, resources []Message) error { +func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []Message) error { ch := make(chan Message, len(resources)) for _, resource := range resources { ch <- resource diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go.backup similarity index 100% rename from plugin/testing_sync.go rename to plugin/testing_sync.go.backup diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index f6b16f3ae3..55a1c0e82d 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -11,7 +11,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (s *PluginTestSuite) testUpsert(ctx context.Context) error { +func (s *WriterTestSuite) testUpsert(ctx context.Context) error { tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) table := &schema.Table{ Name: tableName, @@ -27,15 +27,16 @@ func (s *PluginTestSuite) testUpsert(ctx context.Context) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("foo") + record := bldr.NewRecord() if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, Upsert: true, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { @@ -47,13 +48,13 @@ func (s *PluginTestSuite) testUpsert(ctx context.Context) error { } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, Upsert: true, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 5a358376af..03046d0c87 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -2,17 +2,13 @@ package plugin import ( "context" - "sort" - "strings" "testing" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/types" ) -type PluginTestSuite struct { +type WriterTestSuite struct { tests PluginTestSuiteTests plugin *Plugin @@ -45,8 +41,8 @@ type PluginTestSuiteTests struct { // Usually when a destination is not supporting primary keys SkipUpsert bool - // SkipDelete skips testing MessageDelete events. - SkipDelete bool + // SkipDeleteStale skips testing MessageDelete events. + SkipDeleteStale bool // SkipAppend skips testing MessageInsert and Upsert=false. SkipInsert bool @@ -61,27 +57,27 @@ type PluginTestSuiteTests struct { type NewPluginFunc func() *Plugin -func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *PluginTestSuite) { - return func(o *PluginTestSuite) { +func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *WriterTestSuite) { + return func(o *WriterTestSuite) { o.allowNull = allowNull } } -func WithTestIgnoreNullsInLists() func(o *PluginTestSuite) { - return func(o *PluginTestSuite) { +func WithTestIgnoreNullsInLists() func(o *WriterTestSuite) { + return func(o *WriterTestSuite) { o.ignoreNullsInLists = true } } -func WithTestDataOptions(opts schema.TestSourceOptions) func(o *PluginTestSuite) { - return func(o *PluginTestSuite) { +func WithTestDataOptions(opts schema.TestSourceOptions) func(o *WriterTestSuite) { + return func(o *WriterTestSuite) { o.genDatOptions = opts } } -func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *PluginTestSuite)) { +func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *WriterTestSuite)) { t.Helper() - suite := &PluginTestSuite{ + suite := &WriterTestSuite{ tests: tests, plugin: p, } @@ -112,12 +108,12 @@ func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, } }) - t.Run("TestDelete", func(t *testing.T) { + t.Run("TestDeleteStale", func(t *testing.T) { t.Helper() - if suite.tests.SkipDelete { + if suite.tests.SkipDeleteStale { t.Skip("skipping " + t.Name()) } - if err := suite.testDelete(ctx); err != nil { + if err := suite.testDeleteStale(ctx); err != nil { t.Fatal(err) } }) @@ -127,25 +123,7 @@ func PluginTestSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } - migrateMode := MigrateModeSafe - writeMode := WriteModeOverwrite - suite.destinationPluginTestMigrate(ctx, t, p, migrateMode, writeMode, tests.MigrateStrategyOverwrite, opts) - }) - -} - -func sortRecordsBySyncTime(table *schema.Table, records []arrow.Record) { - syncTimeIndex := table.Columns.Index(schema.CqSyncTimeColumn.Name) - cqIDIndex := table.Columns.Index(schema.CqIDColumn.Name) - sort.Slice(records, func(i, j int) bool { - // sort by sync time, then UUID - first := records[i].Column(syncTimeIndex).(*array.Timestamp).Value(0).ToTime(arrow.Millisecond) - second := records[j].Column(syncTimeIndex).(*array.Timestamp).Value(0).ToTime(arrow.Millisecond) - if first.Equal(second) { - firstUUID := records[i].Column(cqIDIndex).(*types.UUIDArray).Value(0).String() - secondUUID := records[j].Column(cqIDIndex).(*types.UUIDArray).Value(0).String() - return strings.Compare(firstUUID, secondUUID) < 0 - } - return first.Before(second) + suite.testMigrate(ctx, t, MigrateModeSafe) + suite.testMigrate(ctx, t, MigrateModeForce) }) } diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index bb4c44c2d8..ad569e5baf 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -5,21 +5,20 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/schema" // "github.com/cloudquery/plugin-sdk/v4/types" ) -func (s *PluginTestSuite) testDelete(ctx context.Context) error { +func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { tableName := fmt.Sprintf("cq_delete_%d", time.Now().Unix()) syncTime := time.Now().UTC().Round(1 * time.Second) table := &schema.Table{ Name: tableName, Columns: []schema.Column{ - {Name: "name", Type: arrow.BinaryTypes.String}, - {Name: "sync_time", Type: arrow.FixedWidthTypes.Timestamp_us}, + schema.CqSourceNameColumn, + schema.CqSyncTimeColumn, }, } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ @@ -31,16 +30,15 @@ func (s *PluginTestSuite) testDelete(ctx context.Context) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("test") bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime) - bldr.Field(0).(*array.StringBuilder).Append("test") - bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) + record := bldr.NewRecord() if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { @@ -48,27 +46,23 @@ func (s *PluginTestSuite) testDelete(ctx context.Context) error { } totalItems := messages.InsertItems() - if totalItems != 2 { - return fmt.Errorf("expected 2 items, got %d", totalItems) + if totalItems != 1 { + return fmt.Errorf("expected 1 items, got %d", totalItems) } bldr = array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("test") bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDelete{ - Record: bldr.NewRecord(), - WhereClauses: []WhereClause{ - { - Column: "name", - Operator: OperatorLessThan, - }, - }, + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDeleteStale{ + Table: table, + SourceName: "test", + SyncTime: syncTime, }); err != nil { return fmt.Errorf("failed to delete stale records: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index 4bc7f66c86..36004ac173 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -11,7 +11,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) -func (s *PluginTestSuite) testInsert(ctx context.Context) error { +func (s *WriterTestSuite) testInsert(ctx context.Context) error { tableName := fmt.Sprintf("cq_test_insert_%d", time.Now().Unix()) table := &schema.Table{ Name: tableName, @@ -27,15 +27,16 @@ func (s *PluginTestSuite) testInsert(ctx context.Context) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("foo") + record := bldr.NewRecord() if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), - Upsert: true, + Record: record, + Upsert: false, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { @@ -47,12 +48,12 @@ func (s *PluginTestSuite) testInsert(ctx context.Context) error { } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), + Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{tableName}, }) if err != nil { diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 78468a817e..13c1fede30 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -17,7 +17,7 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { +func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ Table: source, }); err != nil { @@ -41,7 +41,7 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.syncAll(ctx, SyncOptions{ + messages, err := s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{source.Name}, }) if err != nil { @@ -53,8 +53,8 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou } if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ - Table: target, - Force: strategy == MigrateModeForce, + Table: target, + MigrateForce: strategy == MigrateModeForce, }); err != nil { return fmt.Errorf("failed to create table: %w", err) } @@ -65,7 +65,7 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.syncAll(ctx, SyncOptions{ + messages, err = s.plugin.SyncAll(ctx, SyncOptions{ Tables: []string{source.Name}, }) if err != nil { @@ -86,7 +86,7 @@ func (s *PluginTestSuite) migrate(ctx context.Context, target *schema.Table, sou return nil } -func (s *PluginTestSuite) testMigrate( +func (s *WriterTestSuite) testMigrate( ctx context.Context, t *testing.T, mode MigrateMode, diff --git a/plugin/testing_write_upsert.go b/plugin/testing_write_upsert.go deleted file mode 100644 index 4ee1ba9db7..0000000000 --- a/plugin/testing_write_upsert.go +++ /dev/null @@ -1,69 +0,0 @@ -package plugin - -import ( - "context" - "fmt" - "time" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -func (s *PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context, p *Plugin) error { - tableName := fmt.Sprintf("cq_test_upsert_%d", time.Now().Unix()) - table := &schema.Table{ - Name: tableName, - Columns: []schema.Column{ - {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, - }, - } - if err := p.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ - Table: table, - }); err != nil { - return fmt.Errorf("failed to create table: %w", err) - } - - bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) - bldr.Field(0).(*array.StringBuilder).Append("foo") - - if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), - Upsert: true, - }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) - } - - messages, err := p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) - if err != nil { - return fmt.Errorf("failed to sync: %w", err) - } - totalItems := messages.InsertItems() - if totalItems != 1 { - return fmt.Errorf("expected 1 item, got %d", totalItems) - } - - if err := p.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: bldr.NewRecord(), - Upsert: true, - }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) - } - - messages, err = p.syncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) - if err != nil { - return fmt.Errorf("failed to sync: %w", err) - } - - totalItems = messages.InsertItems() - if totalItems != 1 { - return fmt.Errorf("expected 1 item, got %d", totalItems) - } - - return nil -} diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 08d8c86166..66f56845d7 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -27,33 +27,33 @@ const ( defaultConcurrency = 200000 ) -type SchedulerStrategy int +type Strategy int const ( - SchedulerDFS SchedulerStrategy = iota - SchedulerRoundRobin + StrategyDFS Strategy = iota + StrategyRoundRobin ) -var AllSchedulers = Schedulers{SchedulerDFS, SchedulerRoundRobin} +var AllSchedulers = Strategies{StrategyDFS, StrategyRoundRobin} var AllSchedulerNames = [...]string{ - SchedulerDFS: "dfs", - SchedulerRoundRobin: "round-robin", + StrategyDFS: "dfs", + StrategyRoundRobin: "round-robin", } -type Schedulers []SchedulerStrategy +type Strategies []Strategy -func (s Schedulers) String() string { +func (s Strategies) String() string { var buffer bytes.Buffer - for i, scheduler := range s { + for i, strategy := range s { if i > 0 { buffer.WriteString(", ") } - buffer.WriteString(scheduler.String()) + buffer.WriteString(strategy.String()) } return buffer.String() } -func (s SchedulerStrategy) String() string { +func (s Strategy) String() string { return AllSchedulerNames[s] } @@ -77,7 +77,7 @@ func WithConcurrency(concurrency uint64) Option { } } -func WithSchedulerStrategy(strategy SchedulerStrategy) Option { +func WithSchedulerStrategy(strategy Strategy) Option { return func(s *Scheduler) { s.strategy = strategy } @@ -87,7 +87,7 @@ type Scheduler struct { tables schema.Tables client schema.ClientMeta caser *caser.Caser - strategy SchedulerStrategy + strategy Strategy // status sync metrics metrics *Metrics maxDepth uint64 @@ -124,9 +124,9 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { go func() { defer close(resources) switch s.strategy { - case SchedulerDFS: + case StrategyDFS: s.syncDfs(ctx, resources) - case SchedulerRoundRobin: + case StrategyRoundRobin: s.syncRoundRobin(ctx, resources) default: panic(fmt.Errorf("unknown scheduler %s", s.strategy)) @@ -142,24 +142,24 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { return nil } -func (p *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { +func (s *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { clientName := client.ID() for _, table := range tables { - metrics := p.metrics.TableClient[table.Name][clientName] - p.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") - p.logTablesMetrics(table.Relations, client) + metrics := s.metrics.TableClient[table.Name][clientName] + s.logger.Info().Str("table", table.Name).Str("client", clientName).Uint64("resources", metrics.Resources).Uint64("errors", metrics.Errors).Msg("table sync finished") + s.logTablesMetrics(table.Relations, client) } } -func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { +func (s *Scheduler) resolveResource(ctx context.Context, table *schema.Table, client schema.ClientMeta, parent *schema.Resource, item any) *schema.Resource { var validationErr *schema.ValidationError ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) defer cancel() resource := schema.NewResourceData(table, parent, item) objectStartTime := time.Now() clientID := client.ID() - tableMetrics := p.metrics.TableClient[table.Name][clientID] - logger := p.logger.With().Str("table", table.Name).Str("client", clientID).Logger() + tableMetrics := s.metrics.TableClient[table.Name][clientID] + logger := s.logger.With().Str("table", table.Name).Str("client", clientID).Logger() defer func() { if err := recover(); err != nil { stack := fmt.Sprintf("%s\n%s", err, string(debug.Stack())) @@ -186,7 +186,7 @@ func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, cl } for _, c := range table.Columns { - p.resolveColumn(ctx, logger, tableMetrics, client, resource, c) + s.resolveColumn(ctx, logger, tableMetrics, client, resource, c) } if table.PostResourceResolver != nil { @@ -205,7 +205,7 @@ func (p *Scheduler) resolveResource(ctx context.Context, table *schema.Table, cl return resource } -func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { +func (s *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, tableMetrics *TableClientMetrics, client schema.ClientMeta, resource *schema.Resource, c schema.Column) { var validationErr *schema.ValidationError columnStartTime := time.Now() defer func() { @@ -235,7 +235,7 @@ func (p *Scheduler) resolveColumn(ctx context.Context, logger zerolog.Logger, ta } } else { // base use case: try to get column with CamelCase name - v := funk.Get(resource.GetItem(), p.caser.ToPascal(c.Name), funk.WithAllowZero()) + v := funk.Get(resource.GetItem(), s.caser.ToPascal(c.Name), funk.WithAllowZero()) if v != nil { err := resource.Set(c.Name, v) if err != nil { diff --git a/scheduler/scheduler_dfs.go b/scheduler/scheduler_dfs.go index f0d465684f..86f2874ec6 100644 --- a/scheduler/scheduler_dfs.go +++ b/scheduler/scheduler_dfs.go @@ -17,7 +17,7 @@ import ( func (s *Scheduler) syncDfs(ctx context.Context, resolvedResources chan<- *schema.Resource) { // This is very similar to the concurrent web crawler problem with some minor changes. // We are using DFS to make sure memory usage is capped at O(h) where h is the height of the tree. - tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) + tableConcurrency := max(s.concurrency/minResourceConcurrency, minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency s.tableSems = make([]*semaphore.Weighted, s.maxDepth) diff --git a/scheduler/scheduler_round_robin.go b/scheduler/scheduler_round_robin.go index 43bd337862..f800caebc6 100644 --- a/scheduler/scheduler_round_robin.go +++ b/scheduler/scheduler_round_robin.go @@ -14,7 +14,7 @@ type tableClient struct { } func (s *Scheduler) syncRoundRobin(ctx context.Context, resolvedResources chan<- *schema.Resource) { - tableConcurrency := max(uint64(s.concurrency/minResourceConcurrency), minTableConcurrency) + tableConcurrency := max(s.concurrency/minResourceConcurrency, minTableConcurrency) resourceConcurrency := tableConcurrency * minResourceConcurrency s.tableSems = make([]*semaphore.Weighted, s.maxDepth) diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index 37bd9fea56..6eb6f3db01 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -3,30 +3,23 @@ package scheduler import ( "context" "testing" - "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/google/uuid" "github.com/rs/zerolog" ) type testExecutionClient struct { } -func (t *testExecutionClient) ID() string { +func (*testExecutionClient) ID() string { return "test" } var _ schema.ClientMeta = &testExecutionClient{} -var deterministicStableUUID = uuid.MustParse("c25355aab52c5b70a4e0c9991f5a3b87") -var randomStableUUID = uuid.MustParse("00000000000040008000000000000000") - -var testSyncTime = time.Now() - func testResolverSuccess(_ context.Context, _ schema.ClientMeta, _ *schema.Resource, res chan<- any) error { res <- map[string]any{ "TestColumn": 3, @@ -173,10 +166,6 @@ var syncTestCases = []syncTestCase{ table: testTableSuccess(), data: []scalar.Vector{ { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, - // &scalar.UUID{}, &scalar.Int64{Value: 3, Valid: true}, }, }, @@ -196,17 +185,9 @@ var syncTestCases = []syncTestCase{ table: testTableRelationSuccess(), data: []scalar.Vector{ { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, - // &scalar.UUID{}, &scalar.Int64{Value: 3, Valid: true}, }, { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, - // &scalar.UUID{Value: randomStableUUID, Valid: true}, &scalar.Int64{Value: 3, Valid: true}, }, }, @@ -216,10 +197,6 @@ var syncTestCases = []syncTestCase{ table: testTableSuccessWithPK(), data: []scalar.Vector{ { - // &scalar.String{Value: "testSource", Valid: true}, - // &scalar.Timestamp{Value: testSyncTime, Valid: true}, - // &scalar.UUID{Value: deterministicStableUUID, Valid: true}, - // &scalar.UUID{}, &scalar.Int64{Value: 3, Valid: true}, }, }, @@ -240,7 +217,7 @@ func TestScheduler(t *testing.T) { } } -func testSyncTable(t *testing.T, tc syncTestCase, strategy SchedulerStrategy, deterministicCQID bool) { +func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, deterministicCQID bool) { ctx := context.Background() tables := []*schema.Table{ tc.table, @@ -249,7 +226,7 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy SchedulerStrategy, de opts := []Option{ WithLogger(zerolog.New(zerolog.NewTestWriter(t))), WithSchedulerStrategy(strategy), - // WithDeterministicCQId(deterministicCQID), + WithDeterministicCQId(deterministicCQID), } sc := NewScheduler(tables, &c, opts...) records := make(chan arrow.Record, 10) diff --git a/schema/arrow.go b/schema/arrow.go index f7f61dbe61..4baa2a4b86 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -52,14 +52,47 @@ func (s Schemas) Encode() ([][]byte, error) { return ret, nil } +func RecordToBytes(record arrow.Record) ([]byte, error) { + var buf bytes.Buffer + wr := ipc.NewWriter(&buf, ipc.WithSchema(record.Schema())) + if err := wr.Write(record); err != nil { + return nil, err + } + if err := wr.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func NewRecordFromBytes(b []byte) (arrow.Record, error) { + rdr, err := ipc.NewReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + for rdr.Next() { + rec := rdr.Record() + rec.Retain() + return rec, nil + } + return nil, nil +} + +func NewSchemaFromBytes(b []byte) (*arrow.Schema, error) { + rdr, err := ipc.NewReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + return rdr.Schema(), nil +} + func NewSchemasFromBytes(b [][]byte) (Schemas, error) { + var err error ret := make([]*arrow.Schema, len(b)) for i, buf := range b { - rdr, err := ipc.NewReader(bytes.NewReader(buf)) + ret[i], err = NewSchemaFromBytes(buf) if err != nil { return nil, err } - ret[i] = rdr.Schema() } return ret, nil } diff --git a/schema/table.go b/schema/table.go index 4475170104..9e84f637b1 100644 --- a/schema/table.go +++ b/schema/table.go @@ -1,11 +1,13 @@ package schema import ( + "bytes" "context" "fmt" "regexp" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/cloudquery/plugin-sdk/v4/internal/glob" "golang.org/x/exp/slices" ) @@ -106,6 +108,14 @@ func NewTablesFromArrowSchemas(schemas []*arrow.Schema) (Tables, error) { return tables, nil } +func NewTableFromBytes(b []byte) (*Table, error) { + sc, err := NewSchemaFromBytes(b) + if err != nil { + return nil, err + } + return NewTableFromArrowSchema(sc) +} + // Create a CloudQuery Table abstraction from an arrow schema // arrow schema is a low level representation of a table that can be sent // over the wire in a cross-language way @@ -365,6 +375,15 @@ func (t *Table) PrimaryKeysIndexes() []int { return primaryKeys } +func (t *Table) ToArrowSchemaBytes() ([]byte, error) { + sc := t.ToArrowSchema() + var buf bytes.Buffer + wr := ipc.NewWriter(&buf, ipc.WithSchema(sc)) + if err := wr.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} func (t *Table) ToArrowSchema() *arrow.Schema { fields := make([]arrow.Field, len(t.Columns)) diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 181474c3c4..ff0ad377d3 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -7,7 +7,6 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" @@ -16,6 +15,7 @@ import ( schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" @@ -24,7 +24,7 @@ import ( ) func TestDestination(t *testing.T) { - p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + p := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -129,20 +129,20 @@ func TestDestination(t *testing.T) { } // serversDestination table := serversDestination.TableV2ToV3(tableV2) - readCh := make(chan arrow.Record, 1) - if err := p.Sync(ctx, plugin.SyncOptions{ + msgs, err := p.SyncAll(ctx, plugin.SyncOptions{ Tables: []string{tableName}, - }, readCh); err != nil { + }) + if err != nil { t.Fatal(err) } - close(readCh) totalResources := 0 destRecord := serversDestination.CQTypesOneToRecord(memory.DefaultAllocator, destResource.Data, table.ToArrowSchema()) - for resource := range readCh { + for _, msg := range msgs { totalResources++ - if !array.RecordEqual(destRecord, resource) { + m := msg.(*plugin.MessageInsert) + if !array.RecordEqual(destRecord, m.Record) { // diff := destination.RecordDiff(destRecord, resource) - t.Fatalf("expected %v but got %v", destRecord, resource) + t.Fatalf("expected %v but got %v", destRecord, m.Record) } } if totalResources != 1 { diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 3f15930022..abc789ff2d 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -8,11 +8,11 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" @@ -21,7 +21,7 @@ import ( ) func TestDestinationV1(t *testing.T) { - p := plugin.NewPlugin("testDestinationPlugin", "development", plugin.NewMemDBClient) + p := plugin.NewPlugin("testDestinationPlugin", "development", memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithDestinationV0V1Server(), WithTestListener()) ctx := context.Background() ctx, cancel := context.WithCancel(ctx) @@ -129,19 +129,20 @@ func TestDestinationV1(t *testing.T) { t.Fatal(err) } // serversDestination - readCh := make(chan arrow.Record, 1) - if err := p.Sync(ctx, plugin.SyncOptions{ + msgs, err := p.SyncAll(ctx, plugin.SyncOptions{ Tables: []string{tableName}, - }, readCh); err != nil { + }) + if err != nil { t.Fatal(err) } - close(readCh) totalResources := 0 - for resource := range readCh { + for _, msg := range msgs { totalResources++ - if !array.RecordEqual(rec, resource) { - diff := plugin.RecordDiff(rec, resource) - t.Fatalf("diff at %d: %s", totalResources, diff) + m := msg.(*plugin.MessageInsert) + if !array.RecordEqual(rec, m.Record) { + // diff := plugin.RecordDiff(rec, resource) + // t.Fatalf("diff at %d: %s", totalResources, diff) + t.Fatalf("expected %v but got %v", rec, m.Record) } } if totalResources != 1 { diff --git a/serve/plugin_test.go b/serve/plugin_test.go index d5357d1cb5..fb49d3f55e 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -8,7 +8,9 @@ import ( "testing" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" @@ -60,6 +62,10 @@ func TestPluginServe(t *testing.T) { t.Fatalf("Expected version to be v1.0.0 but got %s", getVersionResponse.Version) } + if _, err := c.Init(ctx, &pb.Init_Request{}); err != nil { + t.Fatal(err) + } + getTablesRes, err := c.GetTables(ctx, &pb.GetTables_Request{}) if err != nil { t.Fatal(err) @@ -70,14 +76,58 @@ func TestPluginServe(t *testing.T) { t.Fatal(err) } - if len(tables) != 2 { - t.Fatalf("Expected 2 tables but got %d", len(tables)) + if len(tables) != 0 { + t.Fatalf("Expected 0 tables but got %d", len(tables)) } - if _, err := c.Init(ctx, &pb.Init_Request{}); err != nil { + testTable := schema.Table{ + Name: "test_table", + Columns: []schema.Column{ + { + Name: "col1", + Type: arrow.BinaryTypes.String, + }, + }, + } + bldr := array.NewRecordBuilder(memory.DefaultAllocator, testTable.ToArrowSchema()) + bldr.Field(0).(*array.StringBuilder).Append("test") + record := bldr.NewRecord() + recordBytes, err := schema.RecordToBytes(record) + if err != nil { + t.Fatal(err) + } + tableBytes, err := testTable.ToArrowSchemaBytes() + if err != nil { + t.Fatal(err) + } + writeClient, err := c.Write(ctx) + if err != nil { + t.Fatal(err) + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_CreateTable{ + CreateTable: &pb.MessageCreateTable{ + Table: tableBytes, + }, + }, + }); err != nil { + t.Fatal(err) + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_Insert{ + Insert: &pb.MessageInsert{ + Record: recordBytes, + }, + }, + }); err != nil { + t.Fatal(err) + } + if _, err := writeClient.CloseAndRecv(); err != nil { t.Fatal(err) } - syncClient, err := c.Sync(ctx, &pb.Sync_Request{}) + syncClient, err := c.Sync(ctx, &pb.Sync_Request{ + Tables: []string{"test_table"}, + }) if err != nil { t.Fatal(err) } @@ -90,7 +140,8 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatal(err) } - rdr, err := ipc.NewReader(bytes.NewReader(r.Resource)) + m := r.Message.(*pb.Sync_Response_Insert) + rdr, err := ipc.NewReader(bytes.NewReader(m.Insert.Record)) if err != nil { t.Fatal(err) } @@ -111,8 +162,8 @@ func TestPluginServe(t *testing.T) { if tableName != "test_table" { t.Fatalf("Expected resource with table name test_table. got: %s", tableName) } - if len(resource.Columns()) != 5 { - t.Fatalf("Expected resource with data length 3 but got %d", len(resource.Columns())) + if len(resource.Columns()) != 1 { + t.Fatalf("Expected resource with data length 1 but got %d", len(resource.Columns())) } totalResources++ } diff --git a/transformers/tables.go b/transformers/tables.go index 9ffbc3dd1f..f8e7c5b46f 100644 --- a/transformers/tables.go +++ b/transformers/tables.go @@ -7,10 +7,10 @@ import ( ) // Set parent links on relational tables -func setParents(tables schema.Tables, parent *schema.Table) { +func SetParents(tables schema.Tables, parent *schema.Table) { for _, table := range tables { table.Parent = parent - setParents(table.Relations, table) + SetParents(table.Relations, table) } } diff --git a/writers/batch_test.go b/writers/batch_test.go index 0ca94fc1ad..dcc38f64aa 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -13,7 +13,7 @@ import ( type testBatchClient struct { } -func (c *testBatchClient) WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error { +func (c *testBatchClient) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { return nil } From 8a4fe3e57ce398626bec444ce105681dfc89f8ae Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 12 Jun 2023 23:20:08 +0300 Subject: [PATCH 067/125] fix --- plugin/plugin_reader.go | 1 - serve/state_v3_test.go.backup | 57 ----------------------------------- 2 files changed, 58 deletions(-) delete mode 100644 serve/state_v3_test.go.backup diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 4f9d51e66a..0544af3738 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -80,7 +80,6 @@ func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- Messa return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() - // p.syncTime = options.SyncTime // startTime := time.Now() if err := p.client.Sync(ctx, options, res); err != nil { diff --git a/serve/state_v3_test.go.backup b/serve/state_v3_test.go.backup deleted file mode 100644 index f75d53353b..0000000000 --- a/serve/state_v3_test.go.backup +++ /dev/null @@ -1,57 +0,0 @@ -package serve - -import ( - "context" - "sync" - "testing" - - "github.com/cloudquery/plugin-sdk/v4/internal/state" - "github.com/cloudquery/plugin-sdk/v4/plugin" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -func TestStateV3(t *testing.T) { - p := plugin.NewPlugin("memdb", "v1.0.0", plugin.NewMemDBClient) - srv := Plugin(p, WithArgs("serve"), WithTestListener()) - ctx := context.Background() - ctx, cancel := context.WithCancel(ctx) - var wg sync.WaitGroup - wg.Add(1) - var serverErr error - go func() { - defer wg.Done() - serverErr = srv.Serve(ctx) - }() - defer func() { - cancel() - wg.Wait() - }() - - // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) - if err != nil { - t.Fatalf("Failed to dial bufnet: %v", err) - } - - stateClient, err := state.NewClient(ctx, "test", conn) - if err != nil { - t.Fatalf("Failed to create state client: %v", err) - } - if err := stateClient.SetKey(ctx, "testKey", "testValue"); err != nil { - t.Fatalf("Failed to set key: %v", err) - } - key, err := stateClient.GetKey(ctx, "testKey") - if err != nil { - t.Fatalf("Failed to get key: %v", err) - } - if key != "testValue" { - t.Fatalf("Unexpected key value: %v", key) - } - - cancel() - wg.Wait() - if serverErr != nil { - t.Fatal(serverErr) - } -} From 5c782417493fa6c21edd11a4790a8bee512b0a28 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Tue, 13 Jun 2023 17:05:37 +0100 Subject: [PATCH 068/125] Add MixedBatchWriter (WIP) --- writers/batch.go | 5 +++ writers/mixed_batch.go | 80 +++++++++++++++++++++++++++++++++++++ writers/mixed_batch_test.go | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 writers/mixed_batch.go create mode 100644 writers/mixed_batch_test.go diff --git a/writers/batch.go b/writers/batch.go index 186643aaf0..1ee914d52f 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -9,10 +9,15 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/internal/pk" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) +type Writer interface { + Write(ctx context.Context, res <-chan plugin.Message) error +} + const ( defaultBatchTimeoutSeconds = 20 defaultBatchSize = 10000 diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go new file mode 100644 index 0000000000..547b05e971 --- /dev/null +++ b/writers/mixed_batch.go @@ -0,0 +1,80 @@ +package writers + +import ( + "context" + "sync" + "time" + + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" + "github.com/rs/zerolog" +) + +// MixedBatchClient is a client that will receive batches of messages for a mixture of tables. +type MixedBatchClient interface { + CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error + InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error + DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error +} + +type MixedBatchWriter struct { + tables schema.Tables + client MixedBatchClient + workers map[string]*worker + workersLock *sync.Mutex + + logger zerolog.Logger + batchTimeout time.Duration + batchSize int + batchSizeBytes int +} + +// Assert at compile-time that MixedBatchWriter implements the Writer interface +var _ Writer = (*MixedBatchWriter)(nil) + +type MixedBatchWriterOption func(writer *MixedBatchWriter) + +func WithMixedBatchWriterLogger(logger zerolog.Logger) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.logger = logger + } +} + +func WithMixedBatchWriterBatchTimeout(timeout time.Duration) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.batchTimeout = timeout + } +} + +func WithMixedBatchWriterBatchSize(size int) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.batchSize = size + } +} + +func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { + return func(p *MixedBatchWriter) { + p.batchSizeBytes = size + } +} + +func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { + c := &MixedBatchWriter{ + tables: tables, + client: client, + workers: make(map[string]*worker), + workersLock: &sync.Mutex{}, + logger: zerolog.Nop(), + batchTimeout: defaultBatchTimeoutSeconds * time.Second, + batchSize: defaultBatchSize, + batchSizeBytes: defaultBatchSizeBytes, + } + for _, opt := range opts { + opt(c) + } + return c, nil +} + +func (c *MixedBatchWriter) Write(ctx context.Context, res <-chan plugin.Message) error { + return nil // TODO +} diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go new file mode 100644 index 0000000000..05c505325f --- /dev/null +++ b/writers/mixed_batch_test.go @@ -0,0 +1,69 @@ +package writers + +import ( + "context" + "testing" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +type testMixedBatchClient struct { +} + +func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error { + return nil +} + +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error { + return nil +} + +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error { + return nil +} + +func TestMixedBatchWriter(t *testing.T) { + ctx := context.Background() + tables := schema.Tables{ + { + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + { + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, + }, + }, + }, + } + + wr, err := NewMixedBatchWriter(tables, &testMixedBatchClient{}) + if err != nil { + t.Fatal(err) + } + ch := make(chan plugin.Message, 1) + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) + bldr.Field(0).(*array.Int64Builder).Append(1) + rec := bldr.NewRecord() + msg := plugin.MessageInsert{ + Record: rec, + } + ch <- msg + close(ch) + if err := wr.Write(ctx, ch); err != nil { + t.Fatal(err) + } +} From 85e6043c8e123bc75abd9ad7c62be91cf0d22771 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Wed, 14 Jun 2023 10:37:10 +0100 Subject: [PATCH 069/125] workers, but probably won't use this --- writers/mixed_batch.go | 188 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 12 deletions(-) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index 547b05e971..c81c4008c5 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -2,31 +2,43 @@ package writers import ( "context" + "reflect" "sync" "time" + "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) -// MixedBatchClient is a client that will receive batches of messages for a mixture of tables. +const ( + msgTypeCreateTable = iota + msgTypeInsert + msgTypeDeleteStale +) + +var allMsgTypes = []int{msgTypeCreateTable, msgTypeInsert, msgTypeDeleteStale} + +// MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error - InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error - DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error + CreateTableBatch(ctx context.Context, messages []plugin.MessageCreateTable) error + InsertBatch(ctx context.Context, messages []plugin.MessageInsert) error + DeleteStaleBatch(ctx context.Context, messages []plugin.MessageDeleteStale) error } type MixedBatchWriter struct { - tables schema.Tables - client MixedBatchClient - workers map[string]*worker - workersLock *sync.Mutex - + tables schema.Tables + client MixedBatchClient logger zerolog.Logger batchTimeout time.Duration batchSize int batchSizeBytes int + + workerCreateTable *mixedBatchWorker[plugin.MessageCreateTable] + workerInsert *mixedBatchWorker[plugin.MessageInsert] + workerDeleteStale *mixedBatchWorker[plugin.MessageDeleteStale] + workersLock *sync.Mutex } // Assert at compile-time that MixedBatchWriter implements the Writer interface @@ -58,16 +70,86 @@ func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { } } +type mixedBatchWorker[T plugin.Message] struct { + count int + wg *sync.WaitGroup + ch chan T + flush chan chan bool + messages []T + writeFunc func(ctx context.Context, messages []T) error +} + +func newWorker[T plugin.Message](writeFunc func(ctx context.Context, messages []T) error) *mixedBatchWorker[T] { + w := &mixedBatchWorker[T]{ + writeFunc: writeFunc, + messages: make([]T, 0, defaultBatchSize), + count: 0, + ch: make(chan T), + wg: &sync.WaitGroup{}, + } + return w +} + +func (w *mixedBatchWorker[T]) listen(ctx context.Context, ch <-chan T) chan chan bool { + flush := make(chan chan bool, 1) + w.wg.Add(1) + go func() { + defer w.wg.Done() + w.start(ctx, ch, flush) + }() + return flush +} + +func (w *mixedBatchWorker[T]) start(ctx context.Context, ch <-chan T, flush chan chan bool) { + sizeBytes := int64(0) + messages := make([]T, 0) + + for { + select { + case msg, ok := <-ch: + if !ok { + if len(messages) > 0 { + w.writeFunc(ctx, messages) + } + return + } + if uint64(len(messages)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { + w.writeFunc(ctx, messages) + messages = make([]T, 0) + sizeBytes = 0 + } + messages = append(messages, msg) + sizeBytes += util.TotalRecordSize(msg) + case <-time.After(w.batchTimeout): + if len(messages) > 0 { + w.writeFunc(ctx, messages) + messages = make([]T, 0) + sizeBytes = 0 + } + case done := <-flush: + if len(messages) > 0 { + w.writeFunc(ctx, messages) + messages = make([]T, 0) + sizeBytes = 0 + } + done <- true + } + } +} + func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { c := &MixedBatchWriter{ tables: tables, client: client, - workers: make(map[string]*worker), workersLock: &sync.Mutex{}, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, batchSize: defaultBatchSize, batchSizeBytes: defaultBatchSizeBytes, + + workerCreateTable: newWorker[plugin.MessageCreateTable](client.CreateTableBatch), + workerInsert: newWorker[plugin.MessageInsert](client.InsertBatch), + workerDeleteStale: newWorker[plugin.MessageDeleteStale](client.DeleteStaleBatch), } for _, opt := range opts { opt(c) @@ -75,6 +157,88 @@ func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ... return c, nil } -func (c *MixedBatchWriter) Write(ctx context.Context, res <-chan plugin.Message) error { - return nil // TODO +// Write starts listening for messages on the msgChan channel and writes them to the client in batches. +func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Message) error { + w.workersLock.Lock() + flushCreateTable := w.workerCreateTable.listen(ctx, msgChan) + flushInsert := w.workerInsert.listen(ctx, msgChan) + flushDeleteStale := w.workerDeleteStale.listen(ctx, msgChan) + w.workersLock.Unlock() + + done := make(chan bool) + for msg := range msgChan { + switch v := msg.(type) { + case plugin.MessageCreateTable: + w.workerCreateTable.ch <- v + case plugin.MessageInsert: + flushCreateTable <- done + <-done + flushDeleteStale <- done + <-done + w.workerInsert.ch <- v + case plugin.MessageDeleteStale: + flushCreateTable <- done + <-done + flushInsert <- done + <-done + w.workerDeleteStale.ch <- v + } + } + + flushCreateTable <- done + <-done + + flushInsert <- done + <-done + + flushDeleteStale <- done + <-done + + w.workersLock.Lock() + close(w.workerCreateTable.ch) + close(w.workerInsert.ch) + close(w.workerDeleteStale.ch) + + w.workersLock.Unlock() + return nil +} + +func (w *MixedBatchWriter) flush(ctx context.Context, messageID int, messages []plugin.Message) error { + var err error + switch messageID { + case msgTypeCreateTable: + msgs := make([]plugin.MessageCreateTable, len(messages)) + for i := range messages { + msgs[i] = messages[i].(plugin.MessageCreateTable) + } + err = w.client.CreateTableBatch(ctx, msgs) + case msgTypeInsert: + // TODO: should we remove duplicates here? + w.writeInsert(ctx, messages) + case msgTypeDeleteStale: + w.writeDeleteStale(ctx, messages) + } + if err != nil { + + } + start := time.Now() + batchSize := len(resources) + if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { + w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") + } else { + w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") + } +} + +func messageID(msg plugin.Message) int { + switch msg.(type) { + case plugin.MessageCreateTable: + return msgTypeCreateTable + case plugin.MessageInsert: + return msgTypeInsert + case plugin.MessageDeleteStale: + return msgTypeDeleteStale + default: + panic("unknown message type: " + reflect.TypeOf(msg).String()) + } } From 1e31a3adc0cdec0b77558dec0e297be9f13a19be Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Wed, 14 Jun 2023 12:16:20 +0100 Subject: [PATCH 070/125] Simplified mixed batch writer --- writers/mixed_batch.go | 244 +++++++++++++++--------------------- writers/mixed_batch_test.go | 189 +++++++++++++++++++++++----- 2 files changed, 261 insertions(+), 172 deletions(-) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index c81c4008c5..d3295864ea 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -2,8 +2,6 @@ package writers import ( "context" - "reflect" - "sync" "time" "github.com/apache/arrow/go/v13/arrow/util" @@ -34,11 +32,6 @@ type MixedBatchWriter struct { batchTimeout time.Duration batchSize int batchSizeBytes int - - workerCreateTable *mixedBatchWorker[plugin.MessageCreateTable] - workerInsert *mixedBatchWorker[plugin.MessageInsert] - workerDeleteStale *mixedBatchWorker[plugin.MessageDeleteStale] - workersLock *sync.Mutex } // Assert at compile-time that MixedBatchWriter implements the Writer interface @@ -70,86 +63,14 @@ func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { } } -type mixedBatchWorker[T plugin.Message] struct { - count int - wg *sync.WaitGroup - ch chan T - flush chan chan bool - messages []T - writeFunc func(ctx context.Context, messages []T) error -} - -func newWorker[T plugin.Message](writeFunc func(ctx context.Context, messages []T) error) *mixedBatchWorker[T] { - w := &mixedBatchWorker[T]{ - writeFunc: writeFunc, - messages: make([]T, 0, defaultBatchSize), - count: 0, - ch: make(chan T), - wg: &sync.WaitGroup{}, - } - return w -} - -func (w *mixedBatchWorker[T]) listen(ctx context.Context, ch <-chan T) chan chan bool { - flush := make(chan chan bool, 1) - w.wg.Add(1) - go func() { - defer w.wg.Done() - w.start(ctx, ch, flush) - }() - return flush -} - -func (w *mixedBatchWorker[T]) start(ctx context.Context, ch <-chan T, flush chan chan bool) { - sizeBytes := int64(0) - messages := make([]T, 0) - - for { - select { - case msg, ok := <-ch: - if !ok { - if len(messages) > 0 { - w.writeFunc(ctx, messages) - } - return - } - if uint64(len(messages)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { - w.writeFunc(ctx, messages) - messages = make([]T, 0) - sizeBytes = 0 - } - messages = append(messages, msg) - sizeBytes += util.TotalRecordSize(msg) - case <-time.After(w.batchTimeout): - if len(messages) > 0 { - w.writeFunc(ctx, messages) - messages = make([]T, 0) - sizeBytes = 0 - } - case done := <-flush: - if len(messages) > 0 { - w.writeFunc(ctx, messages) - messages = make([]T, 0) - sizeBytes = 0 - } - done <- true - } - } -} - func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { c := &MixedBatchWriter{ tables: tables, client: client, - workersLock: &sync.Mutex{}, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, batchSize: defaultBatchSize, batchSizeBytes: defaultBatchSizeBytes, - - workerCreateTable: newWorker[plugin.MessageCreateTable](client.CreateTableBatch), - workerInsert: newWorker[plugin.MessageInsert](client.InsertBatch), - workerDeleteStale: newWorker[plugin.MessageDeleteStale](client.DeleteStaleBatch), } for _, opt := range opts { opt(c) @@ -157,88 +78,129 @@ func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ... return c, nil } +func msgID(msg plugin.Message) int { + switch msg.(type) { + case plugin.MessageCreateTable: + return msgTypeCreateTable + case plugin.MessageInsert: + return msgTypeInsert + case plugin.MessageDeleteStale: + return msgTypeDeleteStale + } + panic("unknown message type") +} + // Write starts listening for messages on the msgChan channel and writes them to the client in batches. func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Message) error { - w.workersLock.Lock() - flushCreateTable := w.workerCreateTable.listen(ctx, msgChan) - flushInsert := w.workerInsert.listen(ctx, msgChan) - flushDeleteStale := w.workerDeleteStale.listen(ctx, msgChan) - w.workersLock.Unlock() - - done := make(chan bool) + createTable := &batchManager[plugin.MessageCreateTable]{ + batch: make([]plugin.MessageCreateTable, 0, w.batchSize), + writeFunc: w.client.CreateTableBatch, + } + insert := &insertBatchManager{ + batch: make([]plugin.MessageInsert, 0, w.batchSize), + writeFunc: w.client.InsertBatch, + maxBatchSizeBytes: int64(w.batchSizeBytes), + } + deleteStale := &batchManager[plugin.MessageDeleteStale]{ + batch: make([]plugin.MessageDeleteStale, 0, w.batchSize), + writeFunc: w.client.DeleteStaleBatch, + } + flush := func(msgType int) error { + switch msgType { + case msgTypeCreateTable: + return createTable.flush(ctx) + case msgTypeInsert: + return insert.flush(ctx) + case msgTypeDeleteStale: + return deleteStale.flush(ctx) + default: + panic("unknown message type") + } + } + prevMsgType := -1 + var err error for msg := range msgChan { + msgType := msgID(msg) + if prevMsgType != -1 && prevMsgType != msgType { + if err := flush(prevMsgType); err != nil { + return err + } + } + prevMsgType = msgType switch v := msg.(type) { case plugin.MessageCreateTable: - w.workerCreateTable.ch <- v + err = createTable.append(ctx, v) case plugin.MessageInsert: - flushCreateTable <- done - <-done - flushDeleteStale <- done - <-done - w.workerInsert.ch <- v + err = insert.append(ctx, v) case plugin.MessageDeleteStale: - flushCreateTable <- done - <-done - flushInsert <- done - <-done - w.workerDeleteStale.ch <- v + err = deleteStale.append(ctx, v) + default: + panic("unknown message type") + } + if err != nil { + return err } } + return flush(prevMsgType) +} - flushCreateTable <- done - <-done - - flushInsert <- done - <-done +// generic batch manager for most message types +type batchManager[T plugin.Message] struct { + batch []T + writeFunc func(ctx context.Context, messages []T) error +} - flushDeleteStale <- done - <-done +func (m *batchManager[T]) append(ctx context.Context, msg T) error { + if len(m.batch) == cap(m.batch) { + if err := m.flush(ctx); err != nil { + return err + } + } + m.batch = append(m.batch, msg) + return nil +} - w.workersLock.Lock() - close(w.workerCreateTable.ch) - close(w.workerInsert.ch) - close(w.workerDeleteStale.ch) +func (m *batchManager[T]) flush(ctx context.Context) error { + if len(m.batch) == 0 { + return nil + } - w.workersLock.Unlock() + err := m.writeFunc(ctx, m.batch) + if err != nil { + return err + } + m.batch = m.batch[:0] return nil } -func (w *MixedBatchWriter) flush(ctx context.Context, messageID int, messages []plugin.Message) error { - var err error - switch messageID { - case msgTypeCreateTable: - msgs := make([]plugin.MessageCreateTable, len(messages)) - for i := range messages { - msgs[i] = messages[i].(plugin.MessageCreateTable) +// special batch manager for insert messages that also keeps track of the total size of the batch +type insertBatchManager struct { + batch []plugin.MessageInsert + writeFunc func(ctx context.Context, messages []plugin.MessageInsert) error + curBatchSizeBytes int64 + maxBatchSizeBytes int64 +} + +func (m *insertBatchManager) append(ctx context.Context, msg plugin.MessageInsert) error { + if len(m.batch) == cap(m.batch) || m.curBatchSizeBytes+util.TotalRecordSize(msg.Record) > m.maxBatchSizeBytes { + if err := m.flush(ctx); err != nil { + return err } - err = w.client.CreateTableBatch(ctx, msgs) - case msgTypeInsert: - // TODO: should we remove duplicates here? - w.writeInsert(ctx, messages) - case msgTypeDeleteStale: - w.writeDeleteStale(ctx, messages) } - if err != nil { + m.batch = append(m.batch, msg) + m.curBatchSizeBytes += util.TotalRecordSize(msg.Record) + return nil +} +func (m *insertBatchManager) flush(ctx context.Context) error { + if len(m.batch) == 0 { + return nil } - start := time.Now() - batchSize := len(resources) - if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { - w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") - } else { - w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") - } -} -func messageID(msg plugin.Message) int { - switch msg.(type) { - case plugin.MessageCreateTable: - return msgTypeCreateTable - case plugin.MessageInsert: - return msgTypeInsert - case plugin.MessageDeleteStale: - return msgTypeDeleteStale - default: - panic("unknown message type: " + reflect.TypeOf(msg).String()) + err := m.writeFunc(ctx, m.batch) + if err != nil { + return err } + m.batch = m.batch[:0] + return nil } diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 05c505325f..620e99c2ed 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -3,6 +3,7 @@ package writers import ( "context" "testing" + "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" @@ -12,58 +13,184 @@ import ( ) type testMixedBatchClient struct { + receivedBatches [][]plugin.Message } -func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, resources []plugin.MessageCreateTable) error { +func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []plugin.MessageCreateTable) error { + m := make([]plugin.Message, len(msgs)) + for i, msg := range msgs { + m[i] = msg + } + c.receivedBatches = append(c.receivedBatches, m) return nil } -func (c *testMixedBatchClient) InsertBatch(ctx context.Context, resources []plugin.MessageInsert) error { +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []plugin.MessageInsert) error { + m := make([]plugin.Message, len(msgs)) + for i, msg := range msgs { + m[i] = msg + } + c.receivedBatches = append(c.receivedBatches, m) return nil } -func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, resources []plugin.MessageDeleteStale) error { +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []plugin.MessageDeleteStale) error { + m := make([]plugin.Message, len(msgs)) + for i, msg := range msgs { + m[i] = msg + } + c.receivedBatches = append(c.receivedBatches, m) return nil } func TestMixedBatchWriter(t *testing.T) { ctx := context.Background() - tables := schema.Tables{ - { - Name: "table1", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, + + // message to create table1 + table1 := &schema.Table{ + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, - { - Name: "table2", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, + } + msgCreateTable1 := plugin.MessageCreateTable{ + Table: table1, + MigrateForce: false, + } + + // message to create table2 + table2 := &schema.Table{ + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, } + msgCreateTable2 := plugin.MessageCreateTable{ + Table: table2, + MigrateForce: false, + } + + // message to insert into table1 + bldr1 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) + bldr1.Field(0).(*array.Int64Builder).Append(1) + rec1 := bldr1.NewRecord() + msgInsertTable1 := plugin.MessageInsert{ + Record: rec1, + } + + // message to insert into table2 + bldr2 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) + bldr2.Field(0).(*array.Int64Builder).Append(1) + rec2 := bldr2.NewRecord() + msgInsertTable2 := plugin.MessageInsert{ + Record: rec2, + Upsert: false, + } - wr, err := NewMixedBatchWriter(tables, &testMixedBatchClient{}) - if err != nil { - t.Fatal(err) + // message to delete stale from table1 + msgDeleteStale1 := plugin.MessageDeleteStale{ + Table: table1, + SourceName: "my-source", + SyncTime: time.Now(), + } + msgDeleteStale2 := plugin.MessageDeleteStale{ + Table: table1, + SourceName: "my-source", + SyncTime: time.Now(), } - ch := make(chan plugin.Message, 1) - bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) - bldr.Field(0).(*array.Int64Builder).Append(1) - rec := bldr.NewRecord() - msg := plugin.MessageInsert{ - Record: rec, + testCases := []struct { + name string + messages []plugin.Message + wantBatches [][]plugin.Message + }{ + { + name: "create table, insert, delete stale", + messages: []plugin.Message{ + msgCreateTable1, + msgCreateTable2, + msgInsertTable1, + msgInsertTable2, + msgDeleteStale1, + msgDeleteStale2, + }, + wantBatches: [][]plugin.Message{ + {msgCreateTable1, msgCreateTable2}, + {msgInsertTable1, msgInsertTable2}, + {msgDeleteStale1, msgDeleteStale2}, + }, + }, + { + name: "interleaved messages", + messages: []plugin.Message{ + msgCreateTable1, + msgInsertTable1, + msgDeleteStale1, + msgCreateTable2, + msgInsertTable2, + msgDeleteStale2, + }, + wantBatches: [][]plugin.Message{ + {msgCreateTable1}, + {msgInsertTable1}, + {msgDeleteStale1}, + {msgCreateTable2}, + {msgInsertTable2}, + {msgDeleteStale2}, + }, + }, + { + name: "interleaved messages", + messages: []plugin.Message{ + msgCreateTable1, + msgCreateTable2, + msgInsertTable1, + msgDeleteStale2, + msgInsertTable2, + msgDeleteStale1, + }, + wantBatches: [][]plugin.Message{ + {msgCreateTable1, msgCreateTable2}, + {msgInsertTable1}, + {msgDeleteStale2}, + {msgInsertTable2}, + {msgDeleteStale1}, + }, + }, } - ch <- msg - close(ch) - if err := wr.Write(ctx, ch); err != nil { - t.Fatal(err) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tables := schema.Tables([]*schema.Table{table1, table2}) + client := &testMixedBatchClient{ + receivedBatches: make([][]plugin.Message, 0), + } + wr, err := NewMixedBatchWriter(tables, client) + if err != nil { + t.Fatal(err) + } + ch := make(chan plugin.Message, len(tc.messages)) + for _, msg := range tc.messages { + ch <- msg + } + close(ch) + if err := wr.Write(ctx, ch); err != nil { + t.Fatal(err) + } + if len(client.receivedBatches) != len(tc.wantBatches) { + t.Fatalf("got %d batches, want %d", len(client.receivedBatches), len(tc.wantBatches)) + } + for i, wantBatch := range tc.wantBatches { + if len(client.receivedBatches[i]) != len(wantBatch) { + t.Fatalf("got %d messages in batch %d, want %d", len(client.receivedBatches[i]), i, len(wantBatch)) + } + } + }) } } From 6528c5e496cd23cd64ff82dcffa33858e39c883f Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 10:22:45 +0100 Subject: [PATCH 071/125] Update New --- writers/mixed_batch.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index d3295864ea..d668706862 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -63,9 +63,8 @@ func WithMixedBatchWriterSizeBytes(size int) MixedBatchWriterOption { } } -func NewMixedBatchWriter(tables schema.Tables, client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { +func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption) (*MixedBatchWriter, error) { c := &MixedBatchWriter{ - tables: tables, client: client, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, From a787638f16f056b3e7b8e22cadbc029a3ac3e02c Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 10:22:58 +0100 Subject: [PATCH 072/125] Add GetTables() to Message interface --- plugin/messages.go | 17 ++++++++++++++++- plugin/plugin.go | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/plugin/messages.go b/plugin/messages.go index 43e3eedacb..3cae546e58 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -7,16 +7,29 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +type Message interface { + GetTable() *schema.Table +} + type MessageCreateTable struct { Table *schema.Table MigrateForce bool } +func (m MessageCreateTable) GetTable() *schema.Table { + return m.Table +} + type MessageInsert struct { + Table *schema.Table Record arrow.Record Upsert bool } +func (m MessageInsert) GetTable() *schema.Table { + return m.Table +} + // MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case // thus it might be deprecated in the future // in favour of MessageDelete or MessageRawQuery @@ -27,7 +40,9 @@ type MessageDeleteStale struct { SyncTime time.Time } -type Message any +func (m MessageDeleteStale) GetTable() *schema.Table { + return m.Table +} type Messages []Message diff --git a/plugin/plugin.go b/plugin/plugin.go index b583e86811..0502aea231 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -75,7 +75,7 @@ func maxDepth(tables schema.Tables) uint64 { } // NewPlugin returns a new CloudQuery Plugin with the given name, version and implementation. -// Depending on the options, it can be write only plugin, read only plugin or both. +// Depending on the options, it can be a write-only plugin, read-only plugin, or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { p := Plugin{ name: name, From 5b8ed22d02a28dc294ba1060cb480a4bc7789b8c Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 15 Jun 2023 10:41:55 +0300 Subject: [PATCH 073/125] fix batchwriter --- plugin/messages.go | 27 +++++ writers/batch.go | 259 ++++++++++++++++++++++++++++-------------- writers/batch_test.go | 190 +++++++++++++++++++++++++++---- 3 files changed, 367 insertions(+), 109 deletions(-) diff --git a/plugin/messages.go b/plugin/messages.go index 3cae546e58..28a9acc019 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -46,6 +46,10 @@ func (m MessageDeleteStale) GetTable() *schema.Table { type Messages []Message +type CreateTables []*MessageCreateTable + +type Inserts []*MessageInsert + func (messages Messages) InsertItems() int64 { items := int64(0) for _, msg := range messages { @@ -56,3 +60,26 @@ func (messages Messages) InsertItems() int64 { } return items } + +func (m CreateTables) Exists(tableName string) bool { + for _, table := range m { + if table.Table.Name == tableName { + return true + } + } + return false +} + +func (m Inserts) Exists(tableName string) bool { + for _, insert := range m { + md := insert.Record.Schema().Metadata() + tableNameMeta, ok := md.GetValue(schema.MetadataTableName) + if !ok { + continue + } + if tableNameMeta == tableName { + return true + } + } + return false +} diff --git a/writers/batch.go b/writers/batch.go index 1ee914d52f..6c67f99549 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -12,6 +12,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" + "golang.org/x/sync/semaphore" ) type Writer interface { @@ -20,19 +21,25 @@ type Writer interface { const ( defaultBatchTimeoutSeconds = 20 + defaultMaxWorkers = int64(10000) defaultBatchSize = 10000 defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB ) type BatchWriterClient interface { - WriteTableBatch(ctx context.Context, table *schema.Table, resources []arrow.Record) error + CreateTables(context.Context, []*plugin.MessageCreateTable) error + WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*plugin.MessageInsert) error + DeleteStale(context.Context, []*plugin.MessageDeleteStale) error } type BatchWriter struct { - tables schema.Tables - client BatchWriterClient - workers map[string]*worker - workersLock *sync.Mutex + client BatchWriterClient + semaphore *semaphore.Weighted + workers map[string]*worker + workersLock *sync.RWMutex + workersWaitGroup *sync.WaitGroup + createTableMessages []*plugin.MessageCreateTable + deleteStaleMessages []*plugin.MessageDeleteStale logger zerolog.Logger batchTimeout time.Duration @@ -54,6 +61,12 @@ func WithBatchTimeout(timeout time.Duration) Option { } } +func WithMaxWorkers(n int64) Option { + return func(p *BatchWriter) { + p.semaphore = semaphore.NewWeighted(n) + } +} + func WithBatchSize(size int) Option { return func(p *BatchWriter) { p.batchSize = size @@ -69,56 +82,80 @@ func WithBatchSizeBytes(size int) Option { type worker struct { count int wg *sync.WaitGroup - ch chan arrow.Record + ch chan *plugin.MessageInsert flush chan chan bool } -func NewBatchWriter(tables schema.Tables, client BatchWriterClient, opts ...Option) (*BatchWriter, error) { +func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, error) { c := &BatchWriter{ - tables: tables, - client: client, - workers: make(map[string]*worker), - workersLock: &sync.Mutex{}, - logger: zerolog.Nop(), - batchTimeout: defaultBatchTimeoutSeconds * time.Second, - batchSize: defaultBatchSize, - batchSizeBytes: defaultBatchSizeBytes, + client: client, + workers: make(map[string]*worker), + workersLock: &sync.RWMutex{}, + workersWaitGroup: &sync.WaitGroup{}, + logger: zerolog.Nop(), + batchTimeout: defaultBatchTimeoutSeconds * time.Second, + batchSize: defaultBatchSize, + batchSizeBytes: defaultBatchSizeBytes, + semaphore: semaphore.NewWeighted(defaultMaxWorkers), } for _, opt := range opts { opt(c) } + c.createTableMessages = make([]*plugin.MessageCreateTable, 0, c.batchSize) + c.deleteStaleMessages = make([]*plugin.MessageDeleteStale, 0, c.batchSize) return c, nil } -func (w *BatchWriter) worker(ctx context.Context, table *schema.Table, ch <-chan arrow.Record, flush <-chan chan bool) { +func (w *BatchWriter) Close(ctx context.Context) error { + w.workersLock.Lock() + defer w.workersLock.Unlock() + for _, w := range w.workers { + close(w.ch) + } + w.workersWaitGroup.Wait() + + return nil +} + +func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *plugin.MessageInsert, flush <-chan chan bool) { sizeBytes := int64(0) - resources := make([]arrow.Record, 0) + resources := make([]*plugin.MessageInsert, 0) + upsertBatch := false for { select { case r, ok := <-ch: if !ok { if len(resources) > 0 { - w.flush(ctx, table, resources) + w.flush(ctx, tableName, upsertBatch, resources) } return } - if uint64(len(resources)) == 1000 || sizeBytes+util.TotalRecordSize(r) > int64(1000) { - w.flush(ctx, table, resources) - resources = make([]arrow.Record, 0) + if upsertBatch != r.Upsert { + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) + sizeBytes = 0 + upsertBatch = r.Upsert + resources = append(resources, r) + sizeBytes = util.TotalRecordSize(r.Record) + } else { + resources = append(resources, r) + sizeBytes += util.TotalRecordSize(r.Record) + } + if len(resources) >= w.batchSize || sizeBytes+util.TotalRecordSize(r.Record) >= int64(w.batchSizeBytes) { + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) sizeBytes = 0 } - resources = append(resources, r) - sizeBytes += util.TotalRecordSize(r) case <-time.After(w.batchTimeout): if len(resources) > 0 { - w.flush(ctx, table, resources) - resources = make([]arrow.Record, 0) + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - w.flush(ctx, table, resources) - resources = make([]arrow.Record, 0) + w.flush(ctx, tableName, upsertBatch, resources) + resources = make([]*plugin.MessageInsert, 0) sizeBytes = 0 } done <- true @@ -129,14 +166,14 @@ func (w *BatchWriter) worker(ctx context.Context, table *schema.Table, ch <-chan } } -func (w *BatchWriter) flush(ctx context.Context, table *schema.Table, resources []arrow.Record) { - resources = w.removeDuplicatesByPK(table, resources) +func (w *BatchWriter) flush(ctx context.Context, tableName string, upsertBatch bool, resources []*plugin.MessageInsert) { + // resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) - if err := w.client.WriteTableBatch(ctx, table, resources); err != nil { - w.logger.Err(err).Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") + if err := w.client.WriteTableBatch(ctx, tableName, upsertBatch, resources); err != nil { + w.logger.Err(err).Str("table", tableName).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") } else { - w.logger.Info().Str("table", table.Name).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") + w.logger.Info().Str("table", tableName).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") } } @@ -167,68 +204,122 @@ func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow. return res } -func (w *BatchWriter) Write(ctx context.Context, res <-chan arrow.Record) error { - workers := make(map[string]*worker, len(w.tables)) +func (w *BatchWriter) flushCreateTables(ctx context.Context) error { + if err := w.client.CreateTables(ctx, w.createTableMessages); err != nil { + return err + } + w.createTableMessages = w.createTableMessages[:0] + return nil +} - w.workersLock.Lock() - for _, table := range w.tables { - table := table - if w.workers[table.Name] == nil { - ch := make(chan arrow.Record) - flush := make(chan chan bool) - wg := &sync.WaitGroup{} - w.workers[table.Name] = &worker{ - count: 1, - ch: ch, - flush: flush, - wg: wg, - } - wg.Add(1) - go func() { - defer wg.Done() - w.worker(ctx, table, ch, flush) - }() - } else { - w.workers[table.Name].count++ - } - // we save this locally because we don't want to access the map after that so we can - // keep the workersLock for as short as possible - workers[table.Name] = w.workers[table.Name] +func (w *BatchWriter) flushDeleteStaleTables(ctx context.Context) error { + if err := w.client.DeleteStale(ctx, w.deleteStaleMessages); err != nil { + return err } - w.workersLock.Unlock() + w.deleteStaleMessages = w.deleteStaleMessages[:0] + return nil +} - for r := range res { - tableName, ok := r.Schema().Metadata().GetValue(schema.MetadataTableName) - if !ok { - return fmt.Errorf("missing table name in record metadata") - } - if _, ok := workers[tableName]; !ok { - return fmt.Errorf("table %s not found in destination", tableName) - } - workers[tableName].ch <- r +func (w *BatchWriter) flushInsert(ctx context.Context, tableName string) { + w.workersLock.RLock() + worker, ok := w.workers[tableName] + if !ok { + w.workersLock.RUnlock() + // no tables to flush + return } + w.workersLock.RUnlock() + ch := make(chan bool) + worker.flush <- ch + <-ch +} - // flush and wait for all workers to finish flush before finish and calling delete stale - // This is because destinations can be longed lived and called from multiple sources - flushChannels := make(map[string]chan bool, len(workers)) - for tableName, w := range workers { - flushCh := make(chan bool) - flushChannels[tableName] = flushCh - w.flush <- flushCh +func (w *BatchWriter) writeAll(ctx context.Context, msgs []plugin.Message) error { + ch := make(chan plugin.Message, len(msgs)) + for _, msg := range msgs { + ch <- msg } - for tableName := range flushChannels { - <-flushChannels[tableName] + close(ch) + return w.Write(ctx, ch) +} + +func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) error { + for msg := range msgs { + switch m := msg.(type) { + case *plugin.MessageDeleteStale: + if len(w.createTableMessages) > 0 { + if err := w.flushCreateTables(ctx); err != nil { + return err + } + } + w.flushInsert(ctx, m.Table.Name) + w.deleteStaleMessages = append(w.deleteStaleMessages, m) + if len(w.deleteStaleMessages) > w.batchSize { + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err + } + } + case *plugin.MessageInsert: + if len(w.createTableMessages) > 0 { + if err := w.flushCreateTables(ctx); err != nil { + return err + } + } + if len(w.deleteStaleMessages) > 0 { + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err + } + } + if err := w.startWorker(ctx, m); err != nil { + return err + } + case *plugin.MessageCreateTable: + w.flushInsert(ctx, m.Table.Name) + if len(w.deleteStaleMessages) > 0 { + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err + } + } + w.createTableMessages = append(w.createTableMessages, m) + if len(w.createTableMessages) > w.batchSize { + if err := w.flushCreateTables(ctx); err != nil { + return err + } + } + } } + return nil +} +func (w *BatchWriter) startWorker(ctx context.Context, msg *plugin.MessageInsert) error { + w.workersLock.RLock() + md := msg.Record.Schema().Metadata() + tableName, ok := md.GetValue(schema.MetadataTableName) + if !ok { + w.workersLock.RUnlock() + return fmt.Errorf("table name not found in metadata") + } + wr, ok := w.workers[tableName] + w.workersLock.RUnlock() + if ok { + w.workers[tableName].ch <- msg + return nil + } w.workersLock.Lock() - for tableName := range workers { - w.workers[tableName].count-- - if w.workers[tableName].count == 0 { - close(w.workers[tableName].ch) - w.workers[tableName].wg.Wait() - delete(w.workers, tableName) - } + ch := make(chan *plugin.MessageInsert) + flush := make(chan chan bool) + wr = &worker{ + count: 1, + ch: ch, + flush: flush, } + w.workers[tableName] = wr w.workersLock.Unlock() + w.workersWaitGroup.Add(1) + go func() { + defer w.workersWaitGroup.Done() + w.worker(ctx, tableName, ch, flush) + }() + ch <- msg return nil } diff --git a/writers/batch_test.go b/writers/batch_test.go index dcc38f64aa..cb51311aeb 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -3,54 +3,194 @@ package writers import ( "context" "testing" + "time" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" ) type testBatchClient struct { + createTables []*plugin.MessageCreateTable + inserts []*plugin.MessageInsert + deleteStales []*plugin.MessageDeleteStale } -func (c *testBatchClient) WriteTableBatch(context.Context, *schema.Table, []arrow.Record) error { +func (c *testBatchClient) CreateTables(_ context.Context, msgs []*plugin.MessageCreateTable) error { + c.createTables = append(c.createTables, msgs...) return nil } -func TestBatchWriter(t *testing.T) { - ctx := context.Background() - tables := schema.Tables{ - { - Name: "table1", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, +func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*plugin.MessageInsert) error { + c.inserts = append(c.inserts, msgs...) + return nil +} +func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*plugin.MessageDeleteStale) error { + c.deleteStales = append(c.deleteStales, msgs...) + return nil +} + +var batchTestTables = schema.Tables{ + { + Name: "table1", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, - { - Name: "table2", - Columns: []schema.Column{ - { - Name: "id", - Type: arrow.PrimitiveTypes.Int64, - }, + }, + { + Name: "table2", + Columns: []schema.Column{ + { + Name: "id", + Type: arrow.PrimitiveTypes.Int64, }, }, - } + }, +} + +// TestBatchFlushDifferentMessages tests that if writer receives a message of a new type all other pending +// batches are flushed. +func TestBatchFlushDifferentMessages(t *testing.T) { + ctx := context.Background() - wr, err := NewBatchWriter(tables, &testBatchClient{}) + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient) if err != nil { t.Fatal(err) } - ch := make(chan arrow.Record, 1) - bldr := array.NewRecordBuilder(memory.DefaultAllocator, tables[0].ToArrowSchema()) + bldr := array.NewRecordBuilder(memory.DefaultAllocator, batchTestTables[0].ToArrowSchema()) bldr.Field(0).(*array.Int64Builder).Append(1) - ch <- bldr.NewRecord() - close(ch) - if err := wr.Write(ctx, ch); err != nil { + record := bldr.NewRecord() + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + t.Fatal(err) + } + if len(testClient.createTables) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.createTables)) + } + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{Record: record}}); err != nil { + t.Fatal(err) + } + if len(testClient.createTables) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.createTables)) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) + } + + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 1 { + t.Fatalf("expected 1 insert messages, got %d", len(testClient.inserts)) + } +} + +func TestBatchSize(t *testing.T) { + ctx := context.Background() + + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient, WithBatchSize(2)) + if err != nil { + t.Fatal(err) + } + table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} + record := array.NewRecord(table.ToArrowSchema(), nil, 0) + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { t.Fatal(err) } + // we need to wait for the batch to be flushed + time.Sleep(time.Second * 2) + + if len(testClient.inserts) != 2 { + t.Fatalf("expected 2 create table messages, got %d", len(testClient.inserts)) + } +} + +func TestBatchTimeout(t *testing.T) { + ctx := context.Background() + + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient, WithBatchTimeout(time.Second)) + if err != nil { + t.Fatal(err) + } + table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} + record := array.NewRecord(table.ToArrowSchema(), nil, 0) + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + // we need to wait for the batch to be flushed + time.Sleep(time.Millisecond * 250) + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + // we need to wait for the batch to be flushed + time.Sleep(time.Second * 1) + + if len(testClient.inserts) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + } +} + +func TestBatchUpserts(t *testing.T) { + ctx := context.Background() + + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient) + if err != nil { + t.Fatal(err) + } + table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} + record := array.NewRecord(table.ToArrowSchema(), nil, 0) + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + Upsert: true, + }}); err != nil { + t.Fatal(err) + } + + if len(testClient.inserts) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + } + + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + Record: record, + }}); err != nil { + t.Fatal(err) + } + // we need to wait for the batch to be flushed + time.Sleep(time.Second * 2) + + if len(testClient.inserts) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + } } From 1f8480cb95f16a608b6eb5518290ada99479c078 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 10:28:32 +0100 Subject: [PATCH 074/125] Fix tests --- writers/mixed_batch_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 620e99c2ed..b99cbcf4c0 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -167,11 +167,10 @@ func TestMixedBatchWriter(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - tables := schema.Tables([]*schema.Table{table1, table2}) client := &testMixedBatchClient{ receivedBatches: make([][]plugin.Message, 0), } - wr, err := NewMixedBatchWriter(tables, client) + wr, err := NewMixedBatchWriter(client) if err != nil { t.Fatal(err) } From 13958e6116bda07f91e9b5aead94b6db662d9406 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 15 Jun 2023 12:53:25 +0300 Subject: [PATCH 075/125] make migrate_force part of write options --- internal/memdb/memdb_test.go | 10 +--- .../servers/destination/v0/destinations.go | 14 ++--- .../servers/destination/v1/destinations.go | 14 ++--- internal/servers/plugin/v3/plugin.go | 19 ++++--- plugin/messages.go | 3 +- plugin/plugin_writer.go | 1 + plugin/testing_write.go | 22 ++++---- plugin/testing_write_migrate.go | 54 +++++++++---------- ..._test.go => destination_v0_test.go.backup} | 0 ..._test.go => destination_v1_test.go.backup} | 0 serve/{docs_test.go => docs_test.go.backup} | 4 ++ serve/plugin_test.go | 11 ++++ writers/mixed_batch_test.go | 6 +-- 13 files changed, 87 insertions(+), 71 deletions(-) rename serve/{destination_v0_test.go => destination_v0_test.go.backup} (100%) rename serve/{destination_v1_test.go => destination_v1_test.go.backup} (100%) rename serve/{docs_test.go => docs_test.go.backup} (82%) diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index 44a95c6b06..64ebcca892 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -13,17 +13,11 @@ func TestPlugin(t *testing.T) { if err := p.Init(ctx, nil); err != nil { t.Fatal(err) } - plugin.PluginTestSuiteRunner( + plugin.TestWriterSuiteRunner( t, p, plugin.PluginTestSuiteTests{ - MigrateStrategy: plugin.MigrateStrategy{ - AddColumn: plugin.MigrateModeForce, - AddColumnNotNull: plugin.MigrateModeForce, - RemoveColumn: plugin.MigrateModeForce, - RemoveColumnNotNull: plugin.MigrateModeForce, - ChangeColumn: plugin.MigrateModeForce, - }, + NonForceMigrations: plugin.NonForceMigrations{}, }, ) } diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index ad5506e161..d8c5f85c9d 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -65,12 +65,13 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr writeCh := make(chan plugin.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + }, writeCh) }) for _, table := range tables { writeCh <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + Table: table, } } close(writeCh) @@ -118,13 +119,14 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { eg, ctx := errgroup.WithContext(msg.Context()) // sourceName := r.Source eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + }, msgs) }) for _, table := range tables { msgs <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + Table: table, } } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 0bfdb886ca..50578b5a6f 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -63,12 +63,13 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr writeCh := make(chan plugin.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, writeCh) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.migrateMode == plugin.MigrateModeForce, + }, writeCh) }) for _, table := range tables { writeCh <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.migrateMode == plugin.MigrateModeForce, + Table: table, } } close(writeCh) @@ -114,13 +115,14 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + }, msgs) }) for _, table := range tables { msgs <- &plugin.MessageCreateTable{ - Table: table, - MigrateForce: s.spec.MigrateMode == specs.MigrateModeForced, + Table: table, } } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 64e166e9ec..314475e8fb 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -112,8 +112,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { m.Table.ToArrowSchema() pbMsg.Message = &pb.Sync_Response_CreateTable{ CreateTable: &pb.MessageCreateTable{ - Table: nil, - MigrateForce: m.MigrateForce, + Table: nil, }, } case *plugin.MessageInsert: @@ -162,10 +161,19 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { func (s *Server) Write(msg pb.Plugin_WriteServer) error { msgs := make(chan plugin.Message) - + r, err := msg.Recv() + if err != nil { + return status.Errorf(codes.Internal, "failed to receive msg: %v", err) + } + pbWriteOptions, ok := r.Message.(*pb.Write_Request_Options) + if !ok { + return status.Errorf(codes.Internal, "expected options message, got %T", r.Message) + } eg, ctx := errgroup.WithContext(msg.Context()) eg.Go(func() error { - return s.Plugin.Write(ctx, plugin.WriteOptions{}, msgs) + return s.Plugin.Write(ctx, plugin.WriteOptions{ + MigrateForce: pbWriteOptions.Options.MigrateForce, + }, msgs) }) for { @@ -194,8 +202,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { break } pluginMessage = &plugin.MessageCreateTable{ - Table: table, - MigrateForce: pbMsg.CreateTable.MigrateForce, + Table: table, } case *pb.Write_Request_Insert: record, err := schema.NewRecordFromBytes(pbMsg.Insert.Record) diff --git a/plugin/messages.go b/plugin/messages.go index 28a9acc019..ae97f3070f 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -12,8 +12,7 @@ type Message interface { } type MessageCreateTable struct { - Table *schema.Table - MigrateForce bool + Table *schema.Table } func (m MessageCreateTable) GetTable() *schema.Table { diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 5b5d64912a..4cbf9b55df 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -5,6 +5,7 @@ import ( ) type WriteOptions struct { + MigrateForce bool } // this function is currently used mostly for testing so it's not a public api diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 03046d0c87..3d8da11e4c 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -27,13 +27,13 @@ type WriterTestSuite struct { genDatOptions schema.TestSourceOptions } -// MigrateStrategy defines which tests we should include -type MigrateStrategy struct { - AddColumn MigrateMode - AddColumnNotNull MigrateMode - RemoveColumn MigrateMode - RemoveColumnNotNull MigrateMode - ChangeColumn MigrateMode +// NonForceMigrations defines which migrations are supported by the plugin in non-force mode +type NonForceMigrations struct { + AddColumn bool + AddColumnNotNull bool + RemoveColumn bool + RemoveColumnNotNull bool + ChangeColumn bool } type PluginTestSuiteTests struct { @@ -50,9 +50,9 @@ type PluginTestSuiteTests struct { // SkipMigrate skips testing migration SkipMigrate bool - // MigrateStrategy defines which tests should work with force migration + // NonForceMigrations defines which tests should work with force migration // and which should pass with safe migration - MigrateStrategy MigrateStrategy + NonForceMigrations NonForceMigrations } type NewPluginFunc func() *Plugin @@ -123,7 +123,7 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } - suite.testMigrate(ctx, t, MigrateModeSafe) - suite.testMigrate(ctx, t, MigrateModeForce) + suite.testMigrate(ctx, t, false) + suite.testMigrate(ctx, t, true) }) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 13c1fede30..6acff5a17d 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -17,8 +17,10 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, strategy MigrateMode, mode MigrateMode) error { - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ +func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportNonForce bool, writeOptionMigrateForce bool) error { + if err := s.plugin.writeOne(ctx, WriteOptions{ + writeOptionMigrateForce, + }, &MessageCreateTable{ Table: source, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -35,7 +37,9 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou resource1 := schema.GenTestData(source, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + MigrateForce: writeOptionMigrateForce, + }, &MessageInsert{ Record: resource1, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) @@ -52,9 +56,8 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ - Table: target, - MigrateForce: strategy == MigrateModeForce, + if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageCreateTable{ + Table: target, }); err != nil { return fmt.Errorf("failed to create table: %w", err) } @@ -71,7 +74,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err != nil { return fmt.Errorf("failed to sync: %w", err) } - if strategy == MigrateModeSafe || mode == MigrateModeSafe { + if !writeOptionMigrateForce || supportNonForce { totalItems = messages.InsertItems() if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) @@ -89,12 +92,11 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou func (s *WriterTestSuite) testMigrate( ctx context.Context, t *testing.T, - mode MigrateMode, + forceMigrate bool, ) { t.Run("add_column", func(t *testing.T) { - if s.tests.MigrateStrategy.AddColumn == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.AddColumn { + t.Skip("skipping test: add_column") } tableName := "add_column_" + tableUUIDSuffix() source := &schema.Table{ @@ -111,15 +113,14 @@ func (s *WriterTestSuite) testMigrate( {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }, } - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumn, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } }) t.Run("add_column_not_null", func(t *testing.T) { - if s.tests.MigrateStrategy.AddColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.AddColumnNotNull { + t.Skip("skipping test: add_column_not_null") } tableName := "add_column_not_null_" + tableUUIDSuffix() source := &schema.Table{ @@ -135,15 +136,14 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.AddColumnNotNull, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } }) t.Run("remove_column", func(t *testing.T) { - if s.tests.MigrateStrategy.RemoveColumn == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumn { + t.Skip("skipping test: remove_column") } tableName := "remove_column_" + tableUUIDSuffix() source := &schema.Table{ @@ -157,15 +157,14 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumn, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } }) t.Run("remove_column_not_null", func(t *testing.T) { - if s.tests.MigrateStrategy.RemoveColumnNotNull == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumnNotNull { + t.Skip("skipping test: remove_column_not_null") } tableName := "remove_column_not_null_" + tableUUIDSuffix() source := &schema.Table{ @@ -180,15 +179,14 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.RemoveColumnNotNull, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } }) t.Run("change_column", func(t *testing.T) { - if s.tests.MigrateStrategy.ChangeColumn == MigrateModeForce && mode == MigrateModeSafe { - t.Skip("skipping as migrate mode is safe") - return + if !forceMigrate && !s.tests.NonForceMigrations.ChangeColumn { + t.Skip("skipping test: change_column") } tableName := "change_column_" + tableUUIDSuffix() source := &schema.Table{ @@ -203,7 +201,7 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.BinaryTypes.String, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.MigrateStrategy.ChangeColumn, mode); err != nil { + if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.ChangeColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } }) diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go.backup similarity index 100% rename from serve/destination_v0_test.go rename to serve/destination_v0_test.go.backup diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go.backup similarity index 100% rename from serve/destination_v1_test.go rename to serve/destination_v1_test.go.backup diff --git a/serve/docs_test.go b/serve/docs_test.go.backup similarity index 82% rename from serve/docs_test.go rename to serve/docs_test.go.backup index 296c9d438e..8b5b5b8abb 100644 --- a/serve/docs_test.go +++ b/serve/docs_test.go.backup @@ -1,6 +1,7 @@ package serve import ( + "context" "testing" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" @@ -13,6 +14,9 @@ func TestPluginDocs(t *testing.T) { "testPlugin", "v1.0.0", memdb.NewMemDBClient) + if err := p.Init(context.Background(), nil); err != nil { + t.Fatal(err) + } srv := Plugin(p, WithArgs("doc", tmpDir), WithTestListener()) if err := srv.newCmdPluginDoc().Execute(); err != nil { t.Fatal(err) diff --git a/serve/plugin_test.go b/serve/plugin_test.go index fb49d3f55e..161193744f 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -103,6 +103,17 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatal(err) } + + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_Options{ + Options: &pb.WriteOptions{ + MigrateForce: true, + }, + }, + }); err != nil { + t.Fatal(err) + } + if err := writeClient.Send(&pb.Write_Request{ Message: &pb.Write_Request_CreateTable{ CreateTable: &pb.MessageCreateTable{ diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index b99cbcf4c0..82b5fcc8d7 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -57,8 +57,7 @@ func TestMixedBatchWriter(t *testing.T) { }, } msgCreateTable1 := plugin.MessageCreateTable{ - Table: table1, - MigrateForce: false, + Table: table1, } // message to create table2 @@ -72,8 +71,7 @@ func TestMixedBatchWriter(t *testing.T) { }, } msgCreateTable2 := plugin.MessageCreateTable{ - Table: table2, - MigrateForce: false, + Table: table2, } // message to insert into table1 From 69b76d914da84df01dc49b58880453cb1c96455b Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 11:08:28 +0100 Subject: [PATCH 076/125] Rename --- internal/memdb/memdb_test.go | 2 +- plugin/testing_write.go | 8 ++++---- plugin/testing_write_migrate.go | 25 +++++++++++++------------ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index 64ebcca892..fe240fe58c 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -17,7 +17,7 @@ func TestPlugin(t *testing.T) { t, p, plugin.PluginTestSuiteTests{ - NonForceMigrations: plugin.NonForceMigrations{}, + SafeMigrations: plugin.SafeMigrations{}, }, ) } diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 3d8da11e4c..fd25b2bf2d 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -27,8 +27,8 @@ type WriterTestSuite struct { genDatOptions schema.TestSourceOptions } -// NonForceMigrations defines which migrations are supported by the plugin in non-force mode -type NonForceMigrations struct { +// SafeMigrations defines which migrations are supported by the plugin in safe migrate mode +type SafeMigrations struct { AddColumn bool AddColumnNotNull bool RemoveColumn bool @@ -50,9 +50,9 @@ type PluginTestSuiteTests struct { // SkipMigrate skips testing migration SkipMigrate bool - // NonForceMigrations defines which tests should work with force migration + // SafeMigrations defines which tests should work with force migration // and which should pass with safe migration - NonForceMigrations NonForceMigrations + SafeMigrations SafeMigrations } type NewPluginFunc func() *Plugin diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 6acff5a17d..ab2fb0afc3 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -17,7 +17,7 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_") } -func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportNonForce bool, writeOptionMigrateForce bool) error { +func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ writeOptionMigrateForce, }, &MessageCreateTable{ @@ -74,7 +74,8 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err != nil { return fmt.Errorf("failed to sync: %w", err) } - if !writeOptionMigrateForce || supportNonForce { + // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) + if !writeOptionMigrateForce || supportsSafeMigrate { totalItems = messages.InsertItems() if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) @@ -95,7 +96,7 @@ func (s *WriterTestSuite) testMigrate( forceMigrate bool, ) { t.Run("add_column", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.AddColumn { + if !forceMigrate && !s.tests.SafeMigrations.AddColumn { t.Skip("skipping test: add_column") } tableName := "add_column_" + tableUUIDSuffix() @@ -113,13 +114,13 @@ func (s *WriterTestSuite) testMigrate( {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, }, } - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumn, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.AddColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate %s: %v", tableName, err) } }) t.Run("add_column_not_null", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.AddColumnNotNull { + if !forceMigrate && !s.tests.SafeMigrations.AddColumnNotNull { t.Skip("skipping test: add_column_not_null") } tableName := "add_column_not_null_" + tableUUIDSuffix() @@ -136,13 +137,13 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.AddColumnNotNull, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.AddColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate add_column_not_null: %v", err) } }) t.Run("remove_column", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumn { + if !forceMigrate && !s.tests.SafeMigrations.RemoveColumn { t.Skip("skipping test: remove_column") } tableName := "remove_column_" + tableUUIDSuffix() @@ -157,13 +158,13 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumn, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.RemoveColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column: %v", err) } }) t.Run("remove_column_not_null", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.RemoveColumnNotNull { + if !forceMigrate && !s.tests.SafeMigrations.RemoveColumnNotNull { t.Skip("skipping test: remove_column_not_null") } tableName := "remove_column_not_null_" + tableUUIDSuffix() @@ -179,13 +180,13 @@ func (s *WriterTestSuite) testMigrate( Columns: schema.ColumnList{ {Name: "id", Type: types.ExtensionTypes.UUID}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.RemoveColumnNotNull, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.RemoveColumnNotNull, forceMigrate); err != nil { t.Fatalf("failed to migrate remove_column_not_null: %v", err) } }) t.Run("change_column", func(t *testing.T) { - if !forceMigrate && !s.tests.NonForceMigrations.ChangeColumn { + if !forceMigrate && !s.tests.SafeMigrations.ChangeColumn { t.Skip("skipping test: change_column") } tableName := "change_column_" + tableUUIDSuffix() @@ -201,7 +202,7 @@ func (s *WriterTestSuite) testMigrate( {Name: "id", Type: types.ExtensionTypes.UUID}, {Name: "bool", Type: arrow.BinaryTypes.String, NotNull: true}, }} - if err := s.migrate(ctx, target, source, s.tests.NonForceMigrations.ChangeColumn, forceMigrate); err != nil { + if err := s.migrate(ctx, target, source, s.tests.SafeMigrations.ChangeColumn, forceMigrate); err != nil { t.Fatalf("failed to migrate change_column: %v", err) } }) From 61746494f072dabebd268d74f2881b973c916474 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 16:58:12 +0100 Subject: [PATCH 077/125] Add EnablePrimaryKeys --- .../servers/destination/v1/destinations.go | 5 ++ plugin/messages.go | 7 ++- plugin/plugin.go | 8 ++- plugin/plugin_writer.go | 3 +- plugin/testing_upsert.go | 12 +++- plugin/testing_write_migrate.go | 2 +- writers/batch.go | 2 +- writers/mixed_batch.go | 59 +++++++++++-------- writers/mixed_batch_test.go | 22 +++---- 9 files changed, 74 insertions(+), 46 deletions(-) diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 50578b5a6f..ce10b3443e 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -153,7 +153,12 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() + table, err := schema.NewTableFromArrowSchema(rec.Schema()) + if err != nil { + return status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) + } msg := &plugin.MessageInsert{ + Table: table, Record: rec, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } diff --git a/plugin/messages.go b/plugin/messages.go index ae97f3070f..d566117e0f 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -20,13 +20,16 @@ func (m MessageCreateTable) GetTable() *schema.Table { } type MessageInsert struct { - Table *schema.Table Record arrow.Record Upsert bool } func (m MessageInsert) GetTable() *schema.Table { - return m.Table + table, err := schema.NewTableFromArrowSchema(m.Record.Schema()) + if err != nil { + panic(err) + } + return table } // MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case diff --git a/plugin/plugin.go b/plugin/plugin.go index 0502aea231..2da0447d4c 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -9,6 +9,8 @@ import ( "github.com/rs/zerolog" ) +var ErrNotImplemented = fmt.Errorf("not implemented") + type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { @@ -21,17 +23,17 @@ type Client interface { type UnimplementedWriter struct{} func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { - return fmt.Errorf("not implemented") + return ErrNotImplemented } type UnimplementedSync struct{} func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { - return fmt.Errorf("not implemented") + return ErrNotImplemented } func (UnimplementedSync) Tables(ctx context.Context) (schema.Tables, error) { - return nil, fmt.Errorf("not implemented") + return nil, ErrNotImplemented } // Plugin is the base structure required to pass to sdk.serve diff --git a/plugin/plugin_writer.go b/plugin/plugin_writer.go index 4cbf9b55df..e4eadb9437 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_writer.go @@ -5,7 +5,8 @@ import ( ) type WriteOptions struct { - MigrateForce bool + MigrateForce bool + EnablePrimaryKeys bool } // this function is currently used mostly for testing so it's not a public api diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 55a1c0e82d..fae1aa89c4 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -19,7 +19,9 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + EnablePrimaryKeys: true, + }, &MessageCreateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -29,7 +31,9 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + EnablePrimaryKeys: true, + }, &MessageInsert{ Record: record, Upsert: true, }); err != nil { @@ -47,7 +51,9 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{ + EnablePrimaryKeys: true, + }, &MessageInsert{ Record: record, Upsert: true, }); err != nil { diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index ab2fb0afc3..83f07f3855 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -19,7 +19,7 @@ func tableUUIDSuffix() string { func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ - writeOptionMigrateForce, + MigrateForce: writeOptionMigrateForce, }, &MessageCreateTable{ Table: source, }); err != nil { diff --git a/writers/batch.go b/writers/batch.go index 6c67f99549..79dedff219 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -16,7 +16,7 @@ import ( ) type Writer interface { - Write(ctx context.Context, res <-chan plugin.Message) error + Write(ctx context.Context, writeOptions plugin.WriteOptions, res <-chan plugin.Message) error } const ( diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index d668706862..ae0ad509d9 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -2,6 +2,7 @@ package writers import ( "context" + "reflect" "time" "github.com/apache/arrow/go/v13/arrow/util" @@ -20,9 +21,9 @@ var allMsgTypes = []int{msgTypeCreateTable, msgTypeInsert, msgTypeDeleteStale} // MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - CreateTableBatch(ctx context.Context, messages []plugin.MessageCreateTable) error - InsertBatch(ctx context.Context, messages []plugin.MessageInsert) error - DeleteStaleBatch(ctx context.Context, messages []plugin.MessageDeleteStale) error + CreateTableBatch(ctx context.Context, messages []*plugin.MessageCreateTable, options plugin.WriteOptions) error + InsertBatch(ctx context.Context, messages []*plugin.MessageInsert, options plugin.WriteOptions) error + DeleteStaleBatch(ctx context.Context, messages []*plugin.MessageDeleteStale, options plugin.WriteOptions) error } type MixedBatchWriter struct { @@ -79,30 +80,33 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption func msgID(msg plugin.Message) int { switch msg.(type) { - case plugin.MessageCreateTable: + case plugin.MessageCreateTable, *plugin.MessageCreateTable: return msgTypeCreateTable - case plugin.MessageInsert: + case plugin.MessageInsert, *plugin.MessageInsert: return msgTypeInsert - case plugin.MessageDeleteStale: + case plugin.MessageDeleteStale, *plugin.MessageDeleteStale: return msgTypeDeleteStale } - panic("unknown message type") + panic("unknown message type: " + reflect.TypeOf(msg).Name()) } // Write starts listening for messages on the msgChan channel and writes them to the client in batches. -func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Message) error { - createTable := &batchManager[plugin.MessageCreateTable]{ - batch: make([]plugin.MessageCreateTable, 0, w.batchSize), - writeFunc: w.client.CreateTableBatch, +func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan plugin.Message) error { + createTable := &batchManager[*plugin.MessageCreateTable]{ + batch: make([]*plugin.MessageCreateTable, 0, w.batchSize), + writeFunc: w.client.CreateTableBatch, + writeOptions: options, } insert := &insertBatchManager{ - batch: make([]plugin.MessageInsert, 0, w.batchSize), + batch: make([]*plugin.MessageInsert, 0, w.batchSize), writeFunc: w.client.InsertBatch, maxBatchSizeBytes: int64(w.batchSizeBytes), + writeOptions: options, } - deleteStale := &batchManager[plugin.MessageDeleteStale]{ - batch: make([]plugin.MessageDeleteStale, 0, w.batchSize), - writeFunc: w.client.DeleteStaleBatch, + deleteStale := &batchManager[*plugin.MessageDeleteStale]{ + batch: make([]*plugin.MessageDeleteStale, 0, w.batchSize), + writeFunc: w.client.DeleteStaleBatch, + writeOptions: options, } flush := func(msgType int) error { switch msgType { @@ -127,11 +131,11 @@ func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Mess } prevMsgType = msgType switch v := msg.(type) { - case plugin.MessageCreateTable: + case *plugin.MessageCreateTable: err = createTable.append(ctx, v) - case plugin.MessageInsert: + case *plugin.MessageInsert: err = insert.append(ctx, v) - case plugin.MessageDeleteStale: + case *plugin.MessageDeleteStale: err = deleteStale.append(ctx, v) default: panic("unknown message type") @@ -140,13 +144,17 @@ func (w *MixedBatchWriter) Write(ctx context.Context, msgChan <-chan plugin.Mess return err } } + if prevMsgType == -1 { + return nil + } return flush(prevMsgType) } // generic batch manager for most message types type batchManager[T plugin.Message] struct { - batch []T - writeFunc func(ctx context.Context, messages []T) error + batch []T + writeFunc func(ctx context.Context, messages []T, options plugin.WriteOptions) error + writeOptions plugin.WriteOptions } func (m *batchManager[T]) append(ctx context.Context, msg T) error { @@ -164,7 +172,7 @@ func (m *batchManager[T]) flush(ctx context.Context) error { return nil } - err := m.writeFunc(ctx, m.batch) + err := m.writeFunc(ctx, m.batch, m.writeOptions) if err != nil { return err } @@ -174,13 +182,14 @@ func (m *batchManager[T]) flush(ctx context.Context) error { // special batch manager for insert messages that also keeps track of the total size of the batch type insertBatchManager struct { - batch []plugin.MessageInsert - writeFunc func(ctx context.Context, messages []plugin.MessageInsert) error + batch []*plugin.MessageInsert + writeFunc func(ctx context.Context, messages []*plugin.MessageInsert, writeOptions plugin.WriteOptions) error curBatchSizeBytes int64 maxBatchSizeBytes int64 + writeOptions plugin.WriteOptions } -func (m *insertBatchManager) append(ctx context.Context, msg plugin.MessageInsert) error { +func (m *insertBatchManager) append(ctx context.Context, msg *plugin.MessageInsert) error { if len(m.batch) == cap(m.batch) || m.curBatchSizeBytes+util.TotalRecordSize(msg.Record) > m.maxBatchSizeBytes { if err := m.flush(ctx); err != nil { return err @@ -196,7 +205,7 @@ func (m *insertBatchManager) flush(ctx context.Context) error { return nil } - err := m.writeFunc(ctx, m.batch) + err := m.writeFunc(ctx, m.batch, m.writeOptions) if err != nil { return err } diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 82b5fcc8d7..610f0bff70 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -16,7 +16,7 @@ type testMixedBatchClient struct { receivedBatches [][]plugin.Message } -func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []plugin.MessageCreateTable) error { +func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []*plugin.MessageCreateTable, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -25,7 +25,7 @@ func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []plug return nil } -func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []plugin.MessageInsert) error { +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*plugin.MessageInsert, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -34,7 +34,7 @@ func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []plugin.Me return nil } -func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []plugin.MessageDeleteStale) error { +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []*plugin.MessageDeleteStale, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -43,6 +43,8 @@ func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []plug return nil } +var _ MixedBatchClient = (*testMixedBatchClient)(nil) + func TestMixedBatchWriter(t *testing.T) { ctx := context.Background() @@ -56,7 +58,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable1 := plugin.MessageCreateTable{ + msgCreateTable1 := &plugin.MessageCreateTable{ Table: table1, } @@ -70,7 +72,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable2 := plugin.MessageCreateTable{ + msgCreateTable2 := &plugin.MessageCreateTable{ Table: table2, } @@ -78,7 +80,7 @@ func TestMixedBatchWriter(t *testing.T) { bldr1 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr1.Field(0).(*array.Int64Builder).Append(1) rec1 := bldr1.NewRecord() - msgInsertTable1 := plugin.MessageInsert{ + msgInsertTable1 := &plugin.MessageInsert{ Record: rec1, } @@ -86,18 +88,18 @@ func TestMixedBatchWriter(t *testing.T) { bldr2 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr2.Field(0).(*array.Int64Builder).Append(1) rec2 := bldr2.NewRecord() - msgInsertTable2 := plugin.MessageInsert{ + msgInsertTable2 := &plugin.MessageInsert{ Record: rec2, Upsert: false, } // message to delete stale from table1 - msgDeleteStale1 := plugin.MessageDeleteStale{ + msgDeleteStale1 := &plugin.MessageDeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), } - msgDeleteStale2 := plugin.MessageDeleteStale{ + msgDeleteStale2 := &plugin.MessageDeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), @@ -177,7 +179,7 @@ func TestMixedBatchWriter(t *testing.T) { ch <- msg } close(ch) - if err := wr.Write(ctx, ch); err != nil { + if err := wr.Write(ctx, plugin.WriteOptions{}, ch); err != nil { t.Fatal(err) } if len(client.receivedBatches) != len(tc.wantBatches) { From bff252318228ddf3c4ecbb7750a6a8000f10008a Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 15 Jun 2023 16:59:42 +0100 Subject: [PATCH 078/125] Undo table change --- internal/servers/destination/v1/destinations.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index ce10b3443e..50578b5a6f 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -153,12 +153,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() - table, err := schema.NewTableFromArrowSchema(rec.Schema()) - if err != nil { - return status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) - } msg := &plugin.MessageInsert{ - Table: table, Record: rec, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } From 8d450c36c942dfe461e653e48e21e039cf6727d4 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 00:20:48 +0300 Subject: [PATCH 079/125] use read method instead of sync for write testing --- {internal/glob => glob}/LICENSE | 0 {internal/glob => glob}/README.md | 0 {internal/glob => glob}/glob.go | 14 ++++++++++++++ {internal/glob => glob}/glob_test.go | 0 internal/memdb/memdb.go | 11 +++++++++++ plugin/plugin.go | 6 ++++++ plugin/plugin_reader.go | 18 ++++++++++++++++- plugin/plugin_test.go | 6 ++++++ plugin/testing_upsert.go | 20 +++++++++++-------- plugin/testing_write_delete.go | 18 +++++++++-------- plugin/testing_write_insert.go | 29 +++++++++++++++++++--------- plugin/testing_write_migrate.go | 26 +++++++++++++++++-------- schema/table.go | 2 +- writers/batch.go | 13 +++++++++++++ 14 files changed, 128 insertions(+), 35 deletions(-) rename {internal/glob => glob}/LICENSE (100%) rename {internal/glob => glob}/README.md (100%) rename {internal/glob => glob}/glob.go (85%) rename {internal/glob => glob}/glob_test.go (100%) diff --git a/internal/glob/LICENSE b/glob/LICENSE similarity index 100% rename from internal/glob/LICENSE rename to glob/LICENSE diff --git a/internal/glob/README.md b/glob/README.md similarity index 100% rename from internal/glob/README.md rename to glob/README.md diff --git a/internal/glob/glob.go b/glob/glob.go similarity index 85% rename from internal/glob/glob.go rename to glob/glob.go index e67db3be18..b4fd6535db 100644 --- a/internal/glob/glob.go +++ b/glob/glob.go @@ -5,6 +5,20 @@ import "strings" // The character which is treated like a glob const GLOB = "*" +func IncludeTable(name string, tables []string, skipTables []string) bool { + for _, t := range skipTables { + if Glob(t, name) { + return false + } + } + for _, t := range tables { + if Glob(t, name) { + return true + } + } + return false +} + // Glob will test a string pattern, potentially containing globs, against a // subject string. The result is a simple true/false, determining whether or // not the glob pattern matched the subject text. diff --git a/internal/glob/glob_test.go b/glob/glob_test.go similarity index 100% rename from internal/glob/glob_test.go rename to glob/glob_test.go diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index ca202a82be..1b3df0e47e 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -84,6 +84,17 @@ func (c *client) ID() string { return "testDestinationMemDB" } +func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { + c.memoryDBLock.RLock() + defer c.memoryDBLock.RUnlock() + + tableName := table.Name + for _, row := range c.memoryDB[tableName] { + res <- row + } + return nil +} + func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- plugin.Message) error { c.memoryDBLock.RLock() diff --git a/plugin/plugin.go b/plugin/plugin.go index 2da0447d4c..818b76f158 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -5,6 +5,7 @@ import ( "fmt" "sync" + "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -16,6 +17,7 @@ type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error + Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error Write(ctx context.Context, options WriteOptions, res <-chan Message) error Close(ctx context.Context) error } @@ -26,6 +28,10 @@ func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res return ErrNotImplemented } +func (UnimplementedWriter) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { + return fmt.Errorf("not implemented") +} + type UnimplementedSync struct{} func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { diff --git a/plugin/plugin_reader.go b/plugin/plugin_reader.go index 0544af3738..f6bdd4cbcc 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_reader.go @@ -4,7 +4,8 @@ import ( "context" "fmt" - "github.com/cloudquery/plugin-sdk/v4/internal/glob" + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/glob" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" @@ -59,6 +60,21 @@ func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientF return NewPlugin(name, version, newClientWrapper, options...) } +func (p *Plugin) readAll(ctx context.Context, table *schema.Table) ([]arrow.Record, error) { + var err error + ch := make(chan arrow.Record) + go func() { + defer close(ch) + err = p.client.Read(ctx, table, ch) + }() + // nolint:prealloc + var records []arrow.Record + for record := range ch { + records = append(records, record) + } + return records, err +} + func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, error) { var err error ch := make(chan Message) diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index e33670b6d3..c50456139c 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -4,6 +4,7 @@ import ( "context" "testing" + "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -22,6 +23,11 @@ func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { func (c *testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { return schema.Tables{}, nil } + +func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { + return nil +} + func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { for _, msg := range c.messages { res <- msg diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index fae1aa89c4..65199178c2 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -40,13 +40,15 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + // totalItems := messages.InsertItems() + totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } @@ -60,14 +62,16 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems = messages.InsertItems() + // totalItems = messages.InsertItems() + totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index ad569e5baf..bfaa5fc70e 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -38,13 +38,14 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 items, got %d", totalItems) @@ -62,13 +63,14 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to delete stale records: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) + records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems = messages.InsertItems() + totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index 36004ac173..15f09d161e 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -11,6 +11,14 @@ import ( "github.com/cloudquery/plugin-sdk/v4/schema" ) +func TotalRows(records []arrow.Record) int64 { + totalRows := int64(0) + for _, record := range records { + totalRows += record.NumRows() + } + return totalRows +} + func (s *WriterTestSuite) testInsert(ctx context.Context) error { tableName := fmt.Sprintf("cq_test_insert_%d", time.Now().Unix()) table := &schema.Table{ @@ -35,14 +43,16 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + readRecords, err := s.plugin.readAll(ctx, table) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + + + totalItems := TotalRows(readRecords) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } @@ -53,14 +63,15 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{tableName}, - }) + readRecords, err = s.plugin.readAll(ctx, table) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{tableName}, + // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems = messages.InsertItems() + totalItems = TotalRows(readRecords) if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 83f07f3855..eb5b9c80d2 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -45,13 +45,15 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{source.Name}, - }) + // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{source.Name}, + // }) + records, err := s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := messages.InsertItems() + // totalItems := messages.InsertItems() + totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } @@ -68,20 +70,28 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - Tables: []string{source.Name}, - }) + // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ + // Tables: []string{source.Name}, + // }) + records, err = s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) } +<<<<<<< HEAD // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) if !writeOptionMigrateForce || supportsSafeMigrate { totalItems = messages.InsertItems() +======= + if !writeOptionMigrateForce || supportNonForce { + // totalItems = messages.InsertItems() + totalItems = TotalRows(records) +>>>>>>> a0daa22 (use read method instead of sync for write testing) if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) } } else { - totalItems = messages.InsertItems() + // totalItems = messages.InsertItems() + totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } diff --git a/schema/table.go b/schema/table.go index 9e84f637b1..76a7384650 100644 --- a/schema/table.go +++ b/schema/table.go @@ -8,7 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/ipc" - "github.com/cloudquery/plugin-sdk/v4/internal/glob" + "github.com/cloudquery/plugin-sdk/v4/glob" "golang.org/x/exp/slices" ) diff --git a/writers/batch.go b/writers/batch.go index 79dedff219..9ece93efc7 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -106,6 +106,19 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err return c, nil } +func (w *BatchWriter) Flush(ctx context.Context) error { + w.workersLock.RLock() + for _, worker := range w.workers { + done := make(chan bool) + worker.flush <- done + <-done + } + w.workersLock.RUnlock() + w.flushCreateTables(ctx) + w.flushDeleteStaleTables(ctx) + return nil +} + func (w *BatchWriter) Close(ctx context.Context) error { w.workersLock.Lock() defer w.workersLock.Unlock() From 188a8e52760c75a5e9772cb23701a9fcb9172ba6 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 00:22:55 +0300 Subject: [PATCH 080/125] rebase complete --- plugin/testing_write_migrate.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index eb5b9c80d2..168f7fa3e2 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -77,15 +77,9 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err != nil { return fmt.Errorf("failed to sync: %w", err) } -<<<<<<< HEAD // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) if !writeOptionMigrateForce || supportsSafeMigrate { - totalItems = messages.InsertItems() -======= - if !writeOptionMigrateForce || supportNonForce { - // totalItems = messages.InsertItems() totalItems = TotalRows(records) ->>>>>>> a0daa22 (use read method instead of sync for write testing) if totalItems != 2 { return fmt.Errorf("expected 2 item, got %d", totalItems) } From 76b8233642179ac492211c2c38ad563bac400dac Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 00:33:46 +0300 Subject: [PATCH 081/125] nits --- plugin/testing_upsert.go | 8 -------- plugin/testing_write_delete.go | 7 ------- plugin/testing_write_insert.go | 6 ------ plugin/testing_write_migrate.go | 8 -------- 4 files changed, 29 deletions(-) diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 65199178c2..3279064382 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -40,14 +40,10 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - // totalItems := messages.InsertItems() totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) @@ -62,15 +58,11 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - // totalItems = messages.InsertItems() totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index bfaa5fc70e..86606b7286 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -8,7 +8,6 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/schema" - // "github.com/cloudquery/plugin-sdk/v4/types" ) func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { @@ -38,9 +37,6 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err := s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) @@ -63,9 +59,6 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { return fmt.Errorf("failed to delete stale records: %w", err) } - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) records, err = s.plugin.readAll(ctx, table) if err != nil { return fmt.Errorf("failed to sync: %w", err) diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index 15f09d161e..e1b2e08be6 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -44,9 +44,6 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { return fmt.Errorf("failed to insert record: %w", err) } readRecords, err := s.plugin.readAll(ctx, table) - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } @@ -64,9 +61,6 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { } readRecords, err = s.plugin.readAll(ctx, table) - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{tableName}, - // }) if err != nil { return fmt.Errorf("failed to sync: %w", err) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 168f7fa3e2..617f1946e7 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -45,14 +45,10 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - // messages, err := s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{source.Name}, - // }) records, err := s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) } - // totalItems := messages.InsertItems() totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) @@ -70,9 +66,6 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to insert record: %w", err) } - // messages, err = s.plugin.SyncAll(ctx, SyncOptions{ - // Tables: []string{source.Name}, - // }) records, err = s.plugin.readAll(ctx, source) if err != nil { return fmt.Errorf("failed to sync: %w", err) @@ -84,7 +77,6 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 2 item, got %d", totalItems) } } else { - // totalItems = messages.InsertItems() totalItems = TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) From af9e51848c6cfbb258400fe312c6cc9d5990a874 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 11:07:03 +0300 Subject: [PATCH 082/125] fix some more tests --- .gitignore | 3 ++- serve/{docs_test.go.backup => docs_test.go} | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) rename serve/{docs_test.go.backup => docs_test.go} (75%) diff --git a/.gitignore b/.gitignore index d15ff8fe72..605ca47ead 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ config.hcl vendor cover.out .delta.* -bench.json \ No newline at end of file +bench.json +serve/^TestPluginDocs$/ \ No newline at end of file diff --git a/serve/docs_test.go.backup b/serve/docs_test.go similarity index 75% rename from serve/docs_test.go.backup rename to serve/docs_test.go index 8b5b5b8abb..1548e0b1c1 100644 --- a/serve/docs_test.go.backup +++ b/serve/docs_test.go @@ -17,8 +17,10 @@ func TestPluginDocs(t *testing.T) { if err := p.Init(context.Background(), nil); err != nil { t.Fatal(err) } - srv := Plugin(p, WithArgs("doc", tmpDir), WithTestListener()) - if err := srv.newCmdPluginDoc().Execute(); err != nil { + srv := Plugin(p) + cmd := srv.newCmdPluginRoot() + cmd.SetArgs([]string{"doc", tmpDir}) + if err := cmd.Execute(); err != nil { t.Fatal(err) } } From 89c32ef1619cad635a6dff0764fc4c25ed7c7da6 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 10:54:20 +0100 Subject: [PATCH 083/125] Rename interfaces to Source/Destination, couple of testing bugfixes --- plugin/plugin.go | 19 +++++----- ...plugin_writer.go => plugin_destination.go} | 16 ++++++--- plugin/{plugin_reader.go => plugin_source.go} | 18 +++++----- plugin/testing_upsert.go | 14 +++----- plugin/testing_write.go | 5 --- plugin/testing_write_migrate.go | 35 ++++++++++--------- 6 files changed, 52 insertions(+), 55 deletions(-) rename plugin/{plugin_writer.go => plugin_destination.go} (58%) rename plugin/{plugin_reader.go => plugin_source.go} (83%) diff --git a/plugin/plugin.go b/plugin/plugin.go index 818b76f158..ab79cc633a 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -15,30 +15,27 @@ var ErrNotImplemented = fmt.Errorf("not implemented") type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) type Client interface { - Tables(ctx context.Context) (schema.Tables, error) - Sync(ctx context.Context, options SyncOptions, res chan<- Message) error - Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error - Write(ctx context.Context, options WriteOptions, res <-chan Message) error - Close(ctx context.Context) error + SourceClient + DestinationClient } -type UnimplementedWriter struct{} +type UnimplementedDestination struct{} -func (UnimplementedWriter) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (UnimplementedDestination) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { return ErrNotImplemented } -func (UnimplementedWriter) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { +func (UnimplementedDestination) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { return fmt.Errorf("not implemented") } -type UnimplementedSync struct{} +type UnimplementedSource struct{} -func (UnimplementedSync) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (UnimplementedSource) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { return ErrNotImplemented } -func (UnimplementedSync) Tables(ctx context.Context) (schema.Tables, error) { +func (UnimplementedSource) Tables(ctx context.Context) (schema.Tables, error) { return nil, ErrNotImplemented } diff --git a/plugin/plugin_writer.go b/plugin/plugin_destination.go similarity index 58% rename from plugin/plugin_writer.go rename to plugin/plugin_destination.go index e4eadb9437..123bdf717f 100644 --- a/plugin/plugin_writer.go +++ b/plugin/plugin_destination.go @@ -2,20 +2,28 @@ package plugin import ( "context" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/schema" ) type WriteOptions struct { - MigrateForce bool - EnablePrimaryKeys bool + MigrateForce bool +} + +type DestinationClient interface { + Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error + Write(ctx context.Context, options WriteOptions, res <-chan Message) error + Close(ctx context.Context) error } -// this function is currently used mostly for testing so it's not a public api +// writeOne is currently used mostly for testing, so it's not a public api func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { resources := []Message{resource} return p.WriteAll(ctx, options, resources) } -// this function is currently used mostly for testing so it's not a public api +// WriteAll is currently used mostly for testing, so it's not a public api func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []Message) error { ch := make(chan Message, len(resources)) for _, resource := range resources { diff --git a/plugin/plugin_reader.go b/plugin/plugin_source.go similarity index 83% rename from plugin/plugin_reader.go rename to plugin/plugin_source.go index f6bdd4cbcc..52d41243c4 100644 --- a/plugin/plugin_reader.go +++ b/plugin/plugin_source.go @@ -19,7 +19,7 @@ type SyncOptions struct { StateBackend state.Client } -type ReadOnlyClient interface { +type SourceClient interface { Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error Close(ctx context.Context) error @@ -39,21 +39,21 @@ func IsTable(name string, includeTablesPattern []string, skipTablesPattern []str return false } -type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (ReadOnlyClient, error) +type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (SourceClient, error) -// NewReadOnlyPlugin returns a new CloudQuery Plugin with the given name, version and implementation. -// this plugin will only support read operations. For ReadWrite plugin use NewPlugin. -func NewReadOnlyPlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { +// NewSourcePlugin returns a new CloudQuery Plugin with the given name, version and implementation. +// Source plugins only support read operations. For Read & Write plugin use NewPlugin. +func NewSourcePlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { - readOnlyClient, err := newClient(ctx, logger, any) + sourceClient, err := newClient(ctx, logger, any) if err != nil { return nil, err } wrapperClient := struct { - ReadOnlyClient - UnimplementedWriter + SourceClient + UnimplementedDestination }{ - ReadOnlyClient: readOnlyClient, + SourceClient: sourceClient, } return wrapperClient, nil } diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 3279064382..3bacf4474a 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -19,9 +19,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{ - EnablePrimaryKeys: true, - }, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -31,9 +29,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{ - EnablePrimaryKeys: true, - }, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: record, Upsert: true, }); err != nil { @@ -42,16 +38,14 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { records, err := s.plugin.readAll(ctx, table) if err != nil { - return fmt.Errorf("failed to sync: %w", err) + return fmt.Errorf("failed to readAll: %w", err) } totalItems := TotalRows(records) if totalItems != 1 { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{ - EnablePrimaryKeys: true, - }, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: record, Upsert: true, }); err != nil { diff --git a/plugin/testing_write.go b/plugin/testing_write.go index fd25b2bf2d..2dd9c4b074 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -76,7 +76,6 @@ func WithTestDataOptions(opts schema.TestSourceOptions) func(o *WriterTestSuite) } func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *WriterTestSuite)) { - t.Helper() suite := &WriterTestSuite{ tests: tests, plugin: p, @@ -89,7 +88,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, ctx := context.Background() t.Run("TestUpsert", func(t *testing.T) { - t.Helper() if suite.tests.SkipUpsert { t.Skip("skipping " + t.Name()) } @@ -99,7 +97,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, }) t.Run("TestInsert", func(t *testing.T) { - t.Helper() if suite.tests.SkipInsert { t.Skip("skipping " + t.Name()) } @@ -109,7 +106,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, }) t.Run("TestDeleteStale", func(t *testing.T) { - t.Helper() if suite.tests.SkipDeleteStale { t.Skip("skipping " + t.Name()) } @@ -119,7 +115,6 @@ func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, }) t.Run("TestMigrate", func(t *testing.T) { - t.Helper() if suite.tests.SkipMigrate { t.Skip("skipping " + t.Name()) } diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 617f1946e7..56aebe956a 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -37,9 +37,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou resource1 := schema.GenTestData(source, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{ - MigrateForce: writeOptionMigrateForce, - }, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: resource1, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) @@ -60,15 +58,16 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("failed to create table: %w", err) } + resource2 := schema.GenTestData(target, opts)[0] if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ - Record: resource1, + Record: resource2, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } - records, err = s.plugin.readAll(ctx, source) + records, err = s.plugin.readAll(ctx, target) if err != nil { - return fmt.Errorf("failed to sync: %w", err) + return fmt.Errorf("failed to readAll: %w", err) } // if force migration is not required, we don't expect any items to be dropped (so there should be 2 items) if !writeOptionMigrateForce || supportsSafeMigrate { @@ -91,11 +90,15 @@ func (s *WriterTestSuite) testMigrate( t *testing.T, forceMigrate bool, ) { - t.Run("add_column", func(t *testing.T) { + suffix := "_safe" + if forceMigrate { + suffix = "_force" + } + t.Run("add_column"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.AddColumn { t.Skip("skipping test: add_column") } - tableName := "add_column_" + tableUUIDSuffix() + tableName := "add_column" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -115,11 +118,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("add_column_not_null", func(t *testing.T) { + t.Run("add_column_not_null"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.AddColumnNotNull { t.Skip("skipping test: add_column_not_null") } - tableName := "add_column_not_null_" + tableUUIDSuffix() + tableName := "add_column_not_null" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -138,11 +141,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("remove_column", func(t *testing.T) { + t.Run("remove_column"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.RemoveColumn { t.Skip("skipping test: remove_column") } - tableName := "remove_column_" + tableUUIDSuffix() + tableName := "remove_column" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -159,11 +162,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("remove_column_not_null", func(t *testing.T) { + t.Run("remove_column_not_null"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.RemoveColumnNotNull { t.Skip("skipping test: remove_column_not_null") } - tableName := "remove_column_not_null_" + tableUUIDSuffix() + tableName := "remove_column_not_null" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ @@ -181,11 +184,11 @@ func (s *WriterTestSuite) testMigrate( } }) - t.Run("change_column", func(t *testing.T) { + t.Run("change_column"+suffix, func(t *testing.T) { if !forceMigrate && !s.tests.SafeMigrations.ChangeColumn { t.Skip("skipping test: change_column") } - tableName := "change_column_" + tableUUIDSuffix() + tableName := "change_column" + suffix + "_" + tableUUIDSuffix() source := &schema.Table{ Name: tableName, Columns: schema.ColumnList{ From 01213f001c80fc99f48c601b85b213a5347ff250 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 11:28:24 +0100 Subject: [PATCH 084/125] Use only 8 chars of uuid --- plugin/testing_write_migrate.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 56aebe956a..1cb67f225c 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -14,7 +14,7 @@ import ( ) func tableUUIDSuffix() string { - return strings.ReplaceAll(uuid.NewString(), "-", "_") + return strings.ReplaceAll(uuid.NewString(), "-", "_")[:8] // use only first 8 chars } func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { @@ -40,7 +40,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: resource1, }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) + return fmt.Errorf("failed to insert first record: %w", err) } records, err := s.plugin.readAll(ctx, source) @@ -62,7 +62,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ Record: resource2, }); err != nil { - return fmt.Errorf("failed to insert record: %w", err) + return fmt.Errorf("failed to insert second record: %w", err) } records, err = s.plugin.readAll(ctx, target) From ba4dd22de2b99e15a934a63079dc448781473da3 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 13:33:24 +0100 Subject: [PATCH 085/125] Rename to MigrateTable --- internal/memdb/memdb.go | 2 +- .../servers/destination/v0/destinations.go | 4 +- .../servers/destination/v1/destinations.go | 4 +- internal/servers/plugin/v3/plugin.go | 12 +++--- internal/servers/plugin/v3/state.go | 4 +- plugin/messages.go | 8 ++-- plugin/plugin_test.go | 2 +- plugin/testing_upsert.go | 2 +- plugin/testing_write_delete.go | 2 +- plugin/testing_write_insert.go | 3 +- plugin/testing_write_migrate.go | 4 +- serve/plugin_test.go | 4 +- writers/batch.go | 42 +++++++++---------- writers/batch_test.go | 22 +++++----- writers/mixed_batch.go | 24 +++++------ writers/mixed_batch_test.go | 26 ++++++------ 16 files changed, 82 insertions(+), 83 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 1b3df0e47e..00c76b4ae4 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -155,7 +155,7 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <- c.memoryDBLock.Lock() switch msg := msg.(type) { - case *plugin.MessageCreateTable: + case *plugin.MessageMigrateTable: c.migrate(ctx, msg.Table) case *plugin.MessageDeleteStale: c.deleteStale(ctx, msg) diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index d8c5f85c9d..905897889f 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -70,7 +70,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageCreateTable{ + writeCh <- &plugin.MessageMigrateTable{ Table: table, } } @@ -125,7 +125,7 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { }) for _, table := range tables { - msgs <- &plugin.MessageCreateTable{ + msgs <- &plugin.MessageMigrateTable{ Table: table, } } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 50578b5a6f..f213747ea5 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -68,7 +68,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageCreateTable{ + writeCh <- &plugin.MessageMigrateTable{ Table: table, } } @@ -121,7 +121,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { }) for _, table := range tables { - msgs <- &plugin.MessageCreateTable{ + msgs <- &plugin.MessageMigrateTable{ Table: table, } } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 314475e8fb..defbc83be1 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -108,10 +108,10 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { pbMsg := &pb.Sync_Response{} for msg := range msgs { switch m := msg.(type) { - case *plugin.MessageCreateTable: + case *plugin.MessageMigrateTable: m.Table.ToArrowSchema() - pbMsg.Message = &pb.Sync_Response_CreateTable{ - CreateTable: &pb.MessageCreateTable{ + pbMsg.Message = &pb.Sync_Response_MigrateTable{ + MigrateTable: &pb.MessageMigrateTable{ Table: nil, }, } @@ -195,13 +195,13 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { var pluginMessage plugin.Message var pbMsgConvertErr error switch pbMsg := r.Message.(type) { - case *pb.Write_Request_CreateTable: - table, err := schema.NewTableFromBytes(pbMsg.CreateTable.Table) + case *pb.Write_Request_MigrateTable: + table, err := schema.NewTableFromBytes(pbMsg.MigrateTable.Table) if err != nil { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) break } - pluginMessage = &plugin.MessageCreateTable{ + pluginMessage = &plugin.MessageMigrateTable{ Table: table, } case *pb.Write_Request_Insert: diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index 81fd753a5c..f7a9015433 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -77,8 +77,8 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec *pbPlugin.S } if err := writeClient.Send(&pbPlugin.Write_Request{ - Message: &pbPlugin.Write_Request_CreateTable{ - CreateTable: &pbPlugin.MessageCreateTable{ + Message: &pbPlugin.Write_Request_MigrateTable{ + MigrateTable: &pbPlugin.MessageMigrateTable{ Table: tableBytes, }, }, diff --git a/plugin/messages.go b/plugin/messages.go index d566117e0f..dd5e91d0c7 100644 --- a/plugin/messages.go +++ b/plugin/messages.go @@ -11,11 +11,11 @@ type Message interface { GetTable() *schema.Table } -type MessageCreateTable struct { +type MessageMigrateTable struct { Table *schema.Table } -func (m MessageCreateTable) GetTable() *schema.Table { +func (m MessageMigrateTable) GetTable() *schema.Table { return m.Table } @@ -48,7 +48,7 @@ func (m MessageDeleteStale) GetTable() *schema.Table { type Messages []Message -type CreateTables []*MessageCreateTable +type MigrateTables []*MessageMigrateTable type Inserts []*MessageInsert @@ -63,7 +63,7 @@ func (messages Messages) InsertItems() int64 { return items } -func (m CreateTables) Exists(tableName string) bool { +func (m MigrateTables) Exists(tableName string) bool { for _, table := range m { if table.Table.Name == tableName { return true diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index c50456139c..b797e81691 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -61,7 +61,7 @@ func TestPluginSuccess(t *testing.T) { t.Fatal(err) } if err := p.WriteAll(ctx, WriteOptions{}, []Message{ - MessageCreateTable{}, + MessageMigrateTable{}, }); err != nil { t.Fatal(err) } diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 3bacf4474a..268d65a45f 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -19,7 +19,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index 86606b7286..69e6ec2e92 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -20,7 +20,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { schema.CqSyncTimeColumn, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index e1b2e08be6..f0cc3e8d83 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -27,7 +27,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -47,7 +47,6 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { if err != nil { return fmt.Errorf("failed to sync: %w", err) } - totalItems := TotalRows(readRecords) if totalItems != 1 { diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 1cb67f225c..697d4cfda7 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -20,7 +20,7 @@ func tableUUIDSuffix() string { func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ MigrateForce: writeOptionMigrateForce, - }, &MessageCreateTable{ + }, &MessageMigrateTable{ Table: source, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -52,7 +52,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageCreateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageMigrateTable{ Table: target, }); err != nil { return fmt.Errorf("failed to create table: %w", err) diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 161193744f..e61555a2fc 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -115,8 +115,8 @@ func TestPluginServe(t *testing.T) { } if err := writeClient.Send(&pb.Write_Request{ - Message: &pb.Write_Request_CreateTable{ - CreateTable: &pb.MessageCreateTable{ + Message: &pb.Write_Request_MigrateTable{ + MigrateTable: &pb.MessageMigrateTable{ Table: tableBytes, }, }, diff --git a/writers/batch.go b/writers/batch.go index 9ece93efc7..34f15c84f0 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -27,19 +27,19 @@ const ( ) type BatchWriterClient interface { - CreateTables(context.Context, []*plugin.MessageCreateTable) error + MigrateTables(context.Context, []*plugin.MessageMigrateTable) error WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*plugin.MessageInsert) error DeleteStale(context.Context, []*plugin.MessageDeleteStale) error } type BatchWriter struct { - client BatchWriterClient - semaphore *semaphore.Weighted - workers map[string]*worker - workersLock *sync.RWMutex - workersWaitGroup *sync.WaitGroup - createTableMessages []*plugin.MessageCreateTable - deleteStaleMessages []*plugin.MessageDeleteStale + client BatchWriterClient + semaphore *semaphore.Weighted + workers map[string]*worker + workersLock *sync.RWMutex + workersWaitGroup *sync.WaitGroup + migrateTableMessages []*plugin.MessageMigrateTable + deleteStaleMessages []*plugin.MessageDeleteStale logger zerolog.Logger batchTimeout time.Duration @@ -101,7 +101,7 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err for _, opt := range opts { opt(c) } - c.createTableMessages = make([]*plugin.MessageCreateTable, 0, c.batchSize) + c.migrateTableMessages = make([]*plugin.MessageMigrateTable, 0, c.batchSize) c.deleteStaleMessages = make([]*plugin.MessageDeleteStale, 0, c.batchSize) return c, nil } @@ -114,7 +114,7 @@ func (w *BatchWriter) Flush(ctx context.Context) error { <-done } w.workersLock.RUnlock() - w.flushCreateTables(ctx) + w.flushMigrateTables(ctx) w.flushDeleteStaleTables(ctx) return nil } @@ -217,11 +217,11 @@ func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow. return res } -func (w *BatchWriter) flushCreateTables(ctx context.Context) error { - if err := w.client.CreateTables(ctx, w.createTableMessages); err != nil { +func (w *BatchWriter) flushMigrateTables(ctx context.Context) error { + if err := w.client.MigrateTables(ctx, w.migrateTableMessages); err != nil { return err } - w.createTableMessages = w.createTableMessages[:0] + w.migrateTableMessages = w.migrateTableMessages[:0] return nil } @@ -260,8 +260,8 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err for msg := range msgs { switch m := msg.(type) { case *plugin.MessageDeleteStale: - if len(w.createTableMessages) > 0 { - if err := w.flushCreateTables(ctx); err != nil { + if len(w.migrateTableMessages) > 0 { + if err := w.flushMigrateTables(ctx); err != nil { return err } } @@ -273,8 +273,8 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err } } case *plugin.MessageInsert: - if len(w.createTableMessages) > 0 { - if err := w.flushCreateTables(ctx); err != nil { + if len(w.migrateTableMessages) > 0 { + if err := w.flushMigrateTables(ctx); err != nil { return err } } @@ -286,16 +286,16 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err if err := w.startWorker(ctx, m); err != nil { return err } - case *plugin.MessageCreateTable: + case *plugin.MessageMigrateTable: w.flushInsert(ctx, m.Table.Name) if len(w.deleteStaleMessages) > 0 { if err := w.flushDeleteStaleTables(ctx); err != nil { return err } } - w.createTableMessages = append(w.createTableMessages, m) - if len(w.createTableMessages) > w.batchSize { - if err := w.flushCreateTables(ctx); err != nil { + w.migrateTableMessages = append(w.migrateTableMessages, m) + if len(w.migrateTableMessages) > w.batchSize { + if err := w.flushMigrateTables(ctx); err != nil { return err } } diff --git a/writers/batch_test.go b/writers/batch_test.go index cb51311aeb..deef3bc627 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -13,13 +13,13 @@ import ( ) type testBatchClient struct { - createTables []*plugin.MessageCreateTable - inserts []*plugin.MessageInsert - deleteStales []*plugin.MessageDeleteStale + migrateTables []*plugin.MessageMigrateTable + inserts []*plugin.MessageInsert + deleteStales []*plugin.MessageDeleteStale } -func (c *testBatchClient) CreateTables(_ context.Context, msgs []*plugin.MessageCreateTable) error { - c.createTables = append(c.createTables, msgs...) +func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*plugin.MessageMigrateTable) error { + c.migrateTables = append(c.migrateTables, msgs...) return nil } @@ -67,24 +67,24 @@ func TestBatchFlushDifferentMessages(t *testing.T) { bldr := array.NewRecordBuilder(memory.DefaultAllocator, batchTestTables[0].ToArrowSchema()) bldr.Field(0).(*array.Int64Builder).Append(1) record := bldr.NewRecord() - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } - if len(testClient.createTables) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.createTables)) + if len(testClient.migrateTables) != 0 { + t.Fatalf("expected 0 create table messages, got %d", len(testClient.migrateTables)) } if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{Record: record}}); err != nil { t.Fatal(err) } - if len(testClient.createTables) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.createTables)) + if len(testClient.migrateTables) != 1 { + t.Fatalf("expected 1 create table messages, got %d", len(testClient.migrateTables)) } if len(testClient.inserts) != 0 { t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageCreateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index ae0ad509d9..998a9d7c13 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -12,16 +12,16 @@ import ( ) const ( - msgTypeCreateTable = iota + msgTypeMigrateTable = iota msgTypeInsert msgTypeDeleteStale ) -var allMsgTypes = []int{msgTypeCreateTable, msgTypeInsert, msgTypeDeleteStale} +var allMsgTypes = []int{msgTypeMigrateTable, msgTypeInsert, msgTypeDeleteStale} // MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - CreateTableBatch(ctx context.Context, messages []*plugin.MessageCreateTable, options plugin.WriteOptions) error + MigrateTableBatch(ctx context.Context, messages []*plugin.MessageMigrateTable, options plugin.WriteOptions) error InsertBatch(ctx context.Context, messages []*plugin.MessageInsert, options plugin.WriteOptions) error DeleteStaleBatch(ctx context.Context, messages []*plugin.MessageDeleteStale, options plugin.WriteOptions) error } @@ -80,8 +80,8 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption func msgID(msg plugin.Message) int { switch msg.(type) { - case plugin.MessageCreateTable, *plugin.MessageCreateTable: - return msgTypeCreateTable + case plugin.MessageMigrateTable, *plugin.MessageMigrateTable: + return msgTypeMigrateTable case plugin.MessageInsert, *plugin.MessageInsert: return msgTypeInsert case plugin.MessageDeleteStale, *plugin.MessageDeleteStale: @@ -92,9 +92,9 @@ func msgID(msg plugin.Message) int { // Write starts listening for messages on the msgChan channel and writes them to the client in batches. func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan plugin.Message) error { - createTable := &batchManager[*plugin.MessageCreateTable]{ - batch: make([]*plugin.MessageCreateTable, 0, w.batchSize), - writeFunc: w.client.CreateTableBatch, + migrateTable := &batchManager[*plugin.MessageMigrateTable]{ + batch: make([]*plugin.MessageMigrateTable, 0, w.batchSize), + writeFunc: w.client.MigrateTableBatch, writeOptions: options, } insert := &insertBatchManager{ @@ -110,8 +110,8 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } flush := func(msgType int) error { switch msgType { - case msgTypeCreateTable: - return createTable.flush(ctx) + case msgTypeMigrateTable: + return migrateTable.flush(ctx) case msgTypeInsert: return insert.flush(ctx) case msgTypeDeleteStale: @@ -131,8 +131,8 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } prevMsgType = msgType switch v := msg.(type) { - case *plugin.MessageCreateTable: - err = createTable.append(ctx, v) + case *plugin.MessageMigrateTable: + err = migrateTable.append(ctx, v) case *plugin.MessageInsert: err = insert.append(ctx, v) case *plugin.MessageDeleteStale: diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 610f0bff70..8b952356e2 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -16,7 +16,7 @@ type testMixedBatchClient struct { receivedBatches [][]plugin.Message } -func (c *testMixedBatchClient) CreateTableBatch(ctx context.Context, msgs []*plugin.MessageCreateTable, options plugin.WriteOptions) error { +func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*plugin.MessageMigrateTable, options plugin.WriteOptions) error { m := make([]plugin.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -58,7 +58,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable1 := &plugin.MessageCreateTable{ + msgMigrateTable1 := &plugin.MessageMigrateTable{ Table: table1, } @@ -72,7 +72,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgCreateTable2 := &plugin.MessageCreateTable{ + msgMigrateTable2 := &plugin.MessageMigrateTable{ Table: table2, } @@ -113,15 +113,15 @@ func TestMixedBatchWriter(t *testing.T) { { name: "create table, insert, delete stale", messages: []plugin.Message{ - msgCreateTable1, - msgCreateTable2, + msgMigrateTable1, + msgMigrateTable2, msgInsertTable1, msgInsertTable2, msgDeleteStale1, msgDeleteStale2, }, wantBatches: [][]plugin.Message{ - {msgCreateTable1, msgCreateTable2}, + {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1, msgInsertTable2}, {msgDeleteStale1, msgDeleteStale2}, }, @@ -129,18 +129,18 @@ func TestMixedBatchWriter(t *testing.T) { { name: "interleaved messages", messages: []plugin.Message{ - msgCreateTable1, + msgMigrateTable1, msgInsertTable1, msgDeleteStale1, - msgCreateTable2, + msgMigrateTable2, msgInsertTable2, msgDeleteStale2, }, wantBatches: [][]plugin.Message{ - {msgCreateTable1}, + {msgMigrateTable1}, {msgInsertTable1}, {msgDeleteStale1}, - {msgCreateTable2}, + {msgMigrateTable2}, {msgInsertTable2}, {msgDeleteStale2}, }, @@ -148,15 +148,15 @@ func TestMixedBatchWriter(t *testing.T) { { name: "interleaved messages", messages: []plugin.Message{ - msgCreateTable1, - msgCreateTable2, + msgMigrateTable1, + msgMigrateTable2, msgInsertTable1, msgDeleteStale2, msgInsertTable2, msgDeleteStale1, }, wantBatches: [][]plugin.Message{ - {msgCreateTable1, msgCreateTable2}, + {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1}, {msgDeleteStale2}, {msgInsertTable2}, From 71e56daebd81ad9c9862d8e55cda05cb3c8a689f Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 16 Jun 2023 16:18:36 +0100 Subject: [PATCH 086/125] Move messages package and many other changes --- internal/memdb/memdb.go | 21 +++++--- .../servers/destination/v0/destinations.go | 15 +++--- .../servers/destination/v1/destinations.go | 15 +++--- internal/servers/plugin/v3/plugin.go | 26 ++++++---- plugin/messages.go => message/message.go | 22 ++++----- plugin/plugin.go | 10 +++- plugin/plugin_destination.go | 16 +++--- plugin/plugin_source.go | 20 ++++---- plugin/plugin_test.go | 15 ++++-- ...testing_sync.go.backup => testing_sync.go} | 37 ++++++++------ plugin/testing_upsert.go | 7 +-- plugin/testing_write.go | 6 +-- plugin/testing_write_delete.go | 7 +-- plugin/testing_write_insert.go | 7 +-- plugin/testing_write_migrate.go | 9 ++-- scheduler/scheduler.go | 12 +++-- scheduler/scheduler_test.go | 25 ++++++---- serve/destination_v0_test.go.backup | 2 +- serve/destination_v1_test.go.backup | 2 +- writers/batch.go | 49 ++++++++++--------- writers/batch_test.go | 30 ++++++------ writers/mixed_batch.go | 41 ++++++++-------- writers/mixed_batch_test.go | 47 +++++++++--------- 23 files changed, 246 insertions(+), 195 deletions(-) rename plugin/messages.go => message/message.go (73%) rename plugin/{testing_sync.go.backup => testing_sync.go} (78%) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 00c76b4ae4..4c273ad3a4 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -7,6 +7,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -84,6 +85,10 @@ func (c *client) ID() string { return "testDestinationMemDB" } +func (c *client) GetSpec() any { + return &struct{}{} +} + func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { c.memoryDBLock.RLock() defer c.memoryDBLock.RUnlock() @@ -95,15 +100,15 @@ func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow return nil } -func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- plugin.Message) error { +func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- message.Message) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { - if !plugin.IsTable(tableName, options.Tables, options.SkipTables) { + if !plugin.MatchesTable(tableName, options.Tables, options.SkipTables) { continue } for _, row := range c.memoryDB[tableName] { - res <- &plugin.MessageInsert{ + res <- &message.Insert{ Record: row, Upsert: false, } @@ -139,7 +144,7 @@ func (c *client) migrate(_ context.Context, table *schema.Table) { c.tables[tableName] = table } -func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <-chan plugin.Message) error { +func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <-chan message.Message) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } @@ -155,11 +160,11 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <- c.memoryDBLock.Lock() switch msg := msg.(type) { - case *plugin.MessageMigrateTable: + case *message.MigrateTable: c.migrate(ctx, msg.Table) - case *plugin.MessageDeleteStale: + case *message.DeleteStale: c.deleteStale(ctx, msg) - case *plugin.MessageInsert: + case *message.Insert: sc := msg.Record.Schema() tableName, ok := sc.Metadata().GetValue(schema.MetadataTableName) if !ok { @@ -183,7 +188,7 @@ func (c *client) Close(context.Context) error { return nil } -func (c *client) deleteStale(_ context.Context, msg *plugin.MessageDeleteStale) { +func (c *client) deleteStale(_ context.Context, msg *message.DeleteStale) { var filteredTable []arrow.Record tableName := msg.Table.Name for i, row := range c.memoryDB[tableName] { diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 905897889f..af89f7de26 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -12,6 +12,7 @@ import ( pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" "github.com/cloudquery/plugin-pb-go/specs/v0" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -62,7 +63,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) s.setPKsForTables(tables) - writeCh := make(chan plugin.Message) + writeCh := make(chan message.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { return s.Plugin.Write(ctx, plugin.WriteOptions{ @@ -70,7 +71,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageMigrateTable{ + writeCh <- &message.MigrateTable{ Table: table, } } @@ -88,7 +89,7 @@ func (*Server) Write(pb.Destination_WriteServer) error { // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write2(msg pb.Destination_Write2Server) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) r, err := msg.Recv() if err != nil { @@ -125,7 +126,7 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { }) for _, table := range tables { - msgs <- &plugin.MessageMigrateTable{ + msgs <- &message.MigrateTable{ Table: table, } } @@ -175,7 +176,7 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { origResource.Data = append([]schemav2.CQType{sourceColumn, syncTimeColumn}, origResource.Data...) } convertedResource := CQTypesToRecord(memory.DefaultAllocator, []schemav2.CQTypes{origResource.Data}, table.ToArrowSchema()) - msg := &plugin.MessageInsert{ + msg := &message.Insert{ Record: convertedResource, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } @@ -235,7 +236,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( tables := TablesV2ToV3(tablesV2).FlattenTables() SetDestinationManagedCqColumns(tables) - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) var writeErr error var wg sync.WaitGroup wg.Add(1) @@ -247,7 +248,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) - msgs <- &plugin.MessageDeleteStale{ + msgs <- &message.DeleteStale{ Table: table, SourceName: req.Source, SyncTime: req.Timestamp.AsTime(), diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index f213747ea5..b3534fc56a 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -12,6 +12,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -60,7 +61,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr } s.setPKsForTables(tables) - writeCh := make(chan plugin.Message) + writeCh := make(chan message.Message) eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { return s.Plugin.Write(ctx, plugin.WriteOptions{ @@ -68,7 +69,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr }, writeCh) }) for _, table := range tables { - writeCh <- &plugin.MessageMigrateTable{ + writeCh <- &message.MigrateTable{ Table: table, } } @@ -82,7 +83,7 @@ func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migr // Note the order of operations in this method is important! // Trying to insert into the `resources` channel before starting the reader goroutine will cause a deadlock. func (s *Server) Write(msg pb.Destination_WriteServer) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) r, err := msg.Recv() if err != nil { @@ -121,7 +122,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { }) for _, table := range tables { - msgs <- &plugin.MessageMigrateTable{ + msgs <- &message.MigrateTable{ Table: table, } } @@ -153,7 +154,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { for rdr.Next() { rec := rdr.Record() rec.Retain() - msg := &plugin.MessageInsert{ + msg := &message.Insert{ Record: rec, Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } @@ -203,7 +204,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to create tables: %v", err) } - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) var writeErr error var wg sync.WaitGroup wg.Add(1) @@ -215,7 +216,7 @@ func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) ( bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) bldr.Field(table.Columns.Index(schema.CqSourceNameColumn.Name)).(*array.StringBuilder).Append(req.Source) bldr.Field(table.Columns.Index(schema.CqSyncTimeColumn.Name)).(*array.TimestampBuilder).AppendTime(req.Timestamp.AsTime()) - msgs <- &plugin.MessageDeleteStale{ + msgs <- &message.DeleteStale{ Table: table, SourceName: req.Source, SyncTime: req.Timestamp.AsTime(), diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index defbc83be1..2a0b3ce429 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -2,6 +2,7 @@ package plugin import ( "context" + "encoding/json" "errors" "fmt" "io" @@ -9,6 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" @@ -57,14 +59,18 @@ func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVer } func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - if err := s.Plugin.Init(ctx, req.Spec); err != nil { + pluginSpec := s.Plugin.GetSpec() + if err := json.Unmarshal(req.GetSpec(), &pluginSpec); err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal plugin spec: %v", err) + } + if err := s.Plugin.Init(ctx, pluginSpec); err != nil { return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) } return &pb.Init_Response{}, nil } func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) var syncErr error ctx := stream.Context() @@ -108,14 +114,14 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { pbMsg := &pb.Sync_Response{} for msg := range msgs { switch m := msg.(type) { - case *plugin.MessageMigrateTable: + case *message.MigrateTable: m.Table.ToArrowSchema() pbMsg.Message = &pb.Sync_Response_MigrateTable{ MigrateTable: &pb.MessageMigrateTable{ Table: nil, }, } - case *plugin.MessageInsert: + case *message.Insert: recordBytes, err := schema.RecordToBytes(m.Record) if err != nil { return status.Errorf(codes.Internal, "failed to encode record: %v", err) @@ -126,7 +132,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { Upsert: m.Upsert, }, } - case *plugin.MessageDeleteStale: + case *message.DeleteStale: tableBytes, err := m.Table.ToArrowSchemaBytes() if err != nil { return status.Errorf(codes.Internal, "failed to encode record: %v", err) @@ -160,7 +166,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { } func (s *Server) Write(msg pb.Plugin_WriteServer) error { - msgs := make(chan plugin.Message) + msgs := make(chan message.Message) r, err := msg.Recv() if err != nil { return status.Errorf(codes.Internal, "failed to receive msg: %v", err) @@ -192,7 +198,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } return status.Errorf(codes.Internal, "failed to receive msg: %v", err) } - var pluginMessage plugin.Message + var pluginMessage message.Message var pbMsgConvertErr error switch pbMsg := r.Message.(type) { case *pb.Write_Request_MigrateTable: @@ -201,7 +207,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) break } - pluginMessage = &plugin.MessageMigrateTable{ + pluginMessage = &message.MigrateTable{ Table: table, } case *pb.Write_Request_Insert: @@ -210,7 +216,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) break } - pluginMessage = &plugin.MessageInsert{ + pluginMessage = &message.Insert{ Record: record, Upsert: pbMsg.Insert.Upsert, } @@ -220,7 +226,7 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) break } - pluginMessage = &plugin.MessageDeleteStale{ + pluginMessage = &message.DeleteStale{ Table: table, SourceName: pbMsg.Delete.SourceName, SyncTime: pbMsg.Delete.SyncTime.AsTime(), diff --git a/plugin/messages.go b/message/message.go similarity index 73% rename from plugin/messages.go rename to message/message.go index dd5e91d0c7..f30f5a4308 100644 --- a/plugin/messages.go +++ b/message/message.go @@ -1,4 +1,4 @@ -package plugin +package message import ( "time" @@ -11,20 +11,20 @@ type Message interface { GetTable() *schema.Table } -type MessageMigrateTable struct { +type MigrateTable struct { Table *schema.Table } -func (m MessageMigrateTable) GetTable() *schema.Table { +func (m MigrateTable) GetTable() *schema.Table { return m.Table } -type MessageInsert struct { +type Insert struct { Record arrow.Record Upsert bool } -func (m MessageInsert) GetTable() *schema.Table { +func (m Insert) GetTable() *schema.Table { table, err := schema.NewTableFromArrowSchema(m.Record.Schema()) if err != nil { panic(err) @@ -32,31 +32,31 @@ func (m MessageInsert) GetTable() *schema.Table { return table } -// MessageDeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case +// DeleteStale is a pretty specific message which requires the destination to be aware of a CLI use-case // thus it might be deprecated in the future // in favour of MessageDelete or MessageRawQuery // The message indeciates that the destination needs to run something like "DELETE FROM table WHERE _cq_source_name=$1 and sync_time < $2" -type MessageDeleteStale struct { +type DeleteStale struct { Table *schema.Table SourceName string SyncTime time.Time } -func (m MessageDeleteStale) GetTable() *schema.Table { +func (m DeleteStale) GetTable() *schema.Table { return m.Table } type Messages []Message -type MigrateTables []*MessageMigrateTable +type MigrateTables []*MigrateTable -type Inserts []*MessageInsert +type Inserts []*Insert func (messages Messages) InsertItems() int64 { items := int64(0) for _, msg := range messages { switch m := msg.(type) { - case *MessageInsert: + case *Insert: items += m.Record.NumRows() } } diff --git a/plugin/plugin.go b/plugin/plugin.go index ab79cc633a..d327ee8b87 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -6,6 +6,7 @@ import ( "sync" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -21,7 +22,7 @@ type Client interface { type UnimplementedDestination struct{} -func (UnimplementedDestination) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (UnimplementedDestination) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { return ErrNotImplemented } @@ -31,7 +32,7 @@ func (UnimplementedDestination) Read(ctx context.Context, table *schema.Table, r type UnimplementedSource struct{} -func (UnimplementedSource) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (UnimplementedSource) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { return ErrNotImplemented } @@ -119,6 +120,11 @@ func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { return tables, nil } +// GetSpec returns an empty struct to be filled with the plugin's configuration. +func (p *Plugin) GetSpec() any { + return p.client.GetSpec() +} + // Init initializes the plugin with the given spec. func (p *Plugin) Init(ctx context.Context, spec any) error { if !p.mu.TryLock() { diff --git a/plugin/plugin_destination.go b/plugin/plugin_destination.go index 123bdf717f..2904b2a9a1 100644 --- a/plugin/plugin_destination.go +++ b/plugin/plugin_destination.go @@ -4,6 +4,7 @@ import ( "context" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -12,20 +13,21 @@ type WriteOptions struct { } type DestinationClient interface { - Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error - Write(ctx context.Context, options WriteOptions, res <-chan Message) error + GetSpec() any Close(ctx context.Context) error + Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error + Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error } // writeOne is currently used mostly for testing, so it's not a public api -func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource Message) error { - resources := []Message{resource} +func (p *Plugin) writeOne(ctx context.Context, options WriteOptions, resource message.Message) error { + resources := []message.Message{resource} return p.WriteAll(ctx, options, resources) } // WriteAll is currently used mostly for testing, so it's not a public api -func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []Message) error { - ch := make(chan Message, len(resources)) +func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources []message.Message) error { + ch := make(chan message.Message, len(resources)) for _, resource := range resources { ch <- resource } @@ -33,7 +35,7 @@ func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources [ return p.Write(ctx, options, ch) } -func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { if err := p.client.Write(ctx, options, res); err != nil { return err } diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index 52d41243c4..c6d6089751 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -6,6 +6,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/glob" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" @@ -20,12 +21,13 @@ type SyncOptions struct { } type SourceClient interface { - Tables(ctx context.Context) (schema.Tables, error) - Sync(ctx context.Context, options SyncOptions, res chan<- Message) error + GetSpec() any Close(ctx context.Context) error + Tables(ctx context.Context) (schema.Tables, error) + Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error } -func IsTable(name string, includeTablesPattern []string, skipTablesPattern []string) bool { +func MatchesTable(name string, includeTablesPattern []string, skipTablesPattern []string) bool { for _, pattern := range skipTablesPattern { if glob.Glob(pattern, name) { return false @@ -39,11 +41,11 @@ func IsTable(name string, includeTablesPattern []string, skipTablesPattern []str return false } -type NewReadOnlyClientFunc func(context.Context, zerolog.Logger, any) (SourceClient, error) +type NewSourceClientFunc func(context.Context, zerolog.Logger, any) (SourceClient, error) // NewSourcePlugin returns a new CloudQuery Plugin with the given name, version and implementation. // Source plugins only support read operations. For Read & Write plugin use NewPlugin. -func NewSourcePlugin(name string, version string, newClient NewReadOnlyClientFunc, options ...Option) *Plugin { +func NewSourcePlugin(name string, version string, newClient NewSourceClientFunc, options ...Option) *Plugin { newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { sourceClient, err := newClient(ctx, logger, any) if err != nil { @@ -75,15 +77,15 @@ func (p *Plugin) readAll(ctx context.Context, table *schema.Table) ([]arrow.Reco return records, err } -func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, error) { +func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (message.Messages, error) { var err error - ch := make(chan Message) + ch := make(chan message.Message) go func() { defer close(ch) err = p.Sync(ctx, options, ch) }() // nolint:prealloc - var resources []Message + var resources []message.Message for resource := range ch { resources = append(resources, resource) } @@ -91,7 +93,7 @@ func (p *Plugin) SyncAll(ctx context.Context, options SyncOptions) (Messages, er } // Sync is syncing data from the requested tables in spec to the given channel -func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index b797e81691..b96d9fc657 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -13,13 +14,17 @@ type testPluginSpec struct { } type testPluginClient struct { - messages []Message + messages []message.Message } func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { return &testPluginClient{}, nil } +func (c *testPluginClient) GetSpec() any { + return &struct{}{} +} + func (c *testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { return schema.Tables{}, nil } @@ -28,13 +33,13 @@ func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, res ch return nil } -func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- Message) error { +func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { for _, msg := range c.messages { res <- msg } return nil } -func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res <-chan Message) error { +func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { for msg := range res { c.messages = append(c.messages, msg) } @@ -60,8 +65,8 @@ func TestPluginSuccess(t *testing.T) { if err := p.WriteAll(ctx, WriteOptions{}, nil); err != nil { t.Fatal(err) } - if err := p.WriteAll(ctx, WriteOptions{}, []Message{ - MessageMigrateTable{}, + if err := p.WriteAll(ctx, WriteOptions{}, []message.Message{ + message.MigrateTable{}, }); err != nil { t.Fatal(err) } diff --git a/plugin/testing_sync.go.backup b/plugin/testing_sync.go similarity index 78% rename from plugin/testing_sync.go.backup rename to plugin/testing_sync.go index ecd136ca00..608b7cd653 100644 --- a/plugin/testing_sync.go.backup +++ b/plugin/testing_sync.go @@ -8,10 +8,11 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) -type Validator func(t *testing.T, plugin *Plugin, resources []arrow.Record) +type Validator func(t *testing.T, plugin *Plugin, resources []message.Message) func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, opts ...TestPluginOption) { t.Helper() @@ -27,7 +28,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, t.Parallel() } - resourcesChannel := make(chan arrow.Record) + resourcesChannel := make(chan message.Message) var syncErr error if err := plugin.Init(context.Background(), spec); err != nil { @@ -39,7 +40,7 @@ func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, syncErr = plugin.Sync(context.Background(), options, resourcesChannel) }() - syncedResources := make([]arrow.Record, 0) + syncedResources := make([]message.Message, 0) for resource := range resourcesChannel { syncedResources = append(syncedResources, resource) } @@ -70,28 +71,32 @@ type testPluginOptions struct { validators []Validator } -func getTableResources(t *testing.T, table *schema.Table, resources []arrow.Record) []arrow.Record { +func getTableResources(t *testing.T, table *schema.Table, messages []message.Message) []arrow.Record { t.Helper() tableResources := make([]arrow.Record, 0) - - for _, resource := range resources { - md := resource.Schema().Metadata() - tableName, ok := md.GetValue(schema.MetadataTableName) - if !ok { - t.Errorf("Expected table name to be set in metadata") - } - if tableName == table.Name { - tableResources = append(tableResources, resource) + for _, msg := range messages { + switch v := msg.(type) { + case *message.Insert: + md := v.Record.Schema().Metadata() + tableName, ok := md.GetValue(schema.MetadataTableName) + if !ok { + t.Errorf("Expected table name to be set in metadata") + } + if tableName == table.Name { + tableResources = append(tableResources, v.Record) + } + default: + t.Errorf("Unexpected message type %T", v) } } return tableResources } -func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) { +func validateTable(t *testing.T, table *schema.Table, messages []message.Message) { t.Helper() - tableResources := getTableResources(t, table, resources) + tableResources := getTableResources(t, table, messages) if len(tableResources) == 0 { t.Errorf("Expected table %s to be synced but it was not found", table.Name) return @@ -99,7 +104,7 @@ func validateTable(t *testing.T, table *schema.Table, resources []arrow.Record) validateResources(t, table, tableResources) } -func validatePlugin(t *testing.T, plugin *Plugin, resources []arrow.Record) { +func validatePlugin(t *testing.T, plugin *Plugin, resources []message.Message) { t.Helper() tables, err := plugin.Tables(context.Background()) if err != nil { diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index 268d65a45f..a32c7d6497 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -8,6 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -19,7 +20,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String, PrimaryKey: true}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.MigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -29,7 +30,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, Upsert: true, }); err != nil { @@ -45,7 +46,7 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, Upsert: true, }); err != nil { diff --git a/plugin/testing_write.go b/plugin/testing_write.go index 2dd9c4b074..e3a6eddc39 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -37,14 +37,14 @@ type SafeMigrations struct { } type PluginTestSuiteTests struct { - // SkipUpsert skips testing with MessageInsert and Upsert=true. + // SkipUpsert skips testing with message.Insert and Upsert=true. // Usually when a destination is not supporting primary keys SkipUpsert bool - // SkipDeleteStale skips testing MessageDelete events. + // SkipDeleteStale skips testing message.Delete events. SkipDeleteStale bool - // SkipAppend skips testing MessageInsert and Upsert=false. + // SkipAppend skips testing message.Insert and Upsert=false. SkipInsert bool // SkipMigrate skips testing migration diff --git a/plugin/testing_write_delete.go b/plugin/testing_write_delete.go index 69e6ec2e92..5ec89b8d93 100644 --- a/plugin/testing_write_delete.go +++ b/plugin/testing_write_delete.go @@ -7,6 +7,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -20,7 +21,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { schema.CqSyncTimeColumn, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.MigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -31,7 +32,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime) record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) @@ -51,7 +52,7 @@ func (s *WriterTestSuite) testDeleteStale(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("test") bldr.Field(1).(*array.TimestampBuilder).AppendTime(syncTime.Add(time.Second)) - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageDeleteStale{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.DeleteStale{ Table: table, SourceName: "test", SyncTime: syncTime, diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index f0cc3e8d83..7dc987a94b 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -8,6 +8,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) @@ -27,7 +28,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { {Name: "name", Type: arrow.BinaryTypes.String}, }, } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.MigrateTable{ Table: table, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -37,7 +38,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { bldr.Field(0).(*array.StringBuilder).Append("foo") record := bldr.NewRecord() - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, Upsert: false, }); err != nil { @@ -53,7 +54,7 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 697d4cfda7..7418f2b043 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -8,6 +8,7 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/uuid" @@ -20,7 +21,7 @@ func tableUUIDSuffix() string { func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ MigrateForce: writeOptionMigrateForce, - }, &MessageMigrateTable{ + }, &message.MigrateTable{ Table: source, }); err != nil { return fmt.Errorf("failed to create table: %w", err) @@ -37,7 +38,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou resource1 := schema.GenTestData(source, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: resource1, }); err != nil { return fmt.Errorf("failed to insert first record: %w", err) @@ -52,14 +53,14 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return fmt.Errorf("expected 1 item, got %d", totalItems) } - if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &MessageMigrateTable{ + if err := s.plugin.writeOne(ctx, WriteOptions{MigrateForce: writeOptionMigrateForce}, &message.MigrateTable{ Table: target, }); err != nil { return fmt.Errorf("failed to create table: %w", err) } resource2 := schema.GenTestData(target, opts)[0] - if err := s.plugin.writeOne(ctx, WriteOptions{}, &MessageInsert{ + if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: resource2, }); err != nil { return fmt.Errorf("failed to insert second record: %w", err) diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 66f56845d7..9d53abbb15 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -9,10 +9,10 @@ import ( "sync/atomic" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/caser" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/getsentry/sentry-go" @@ -83,6 +83,10 @@ func WithSchedulerStrategy(strategy Strategy) Option { } } +type Client interface { + ID() string +} + type Scheduler struct { tables schema.Tables client schema.ClientMeta @@ -119,7 +123,7 @@ func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option return &s } -func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { +func (s *Scheduler) Sync(ctx context.Context, res chan<- message.Message) error { resources := make(chan *schema.Resource) go func() { defer close(resources) @@ -137,12 +141,12 @@ func (s *Scheduler) Sync(ctx context.Context, res chan<- arrow.Record) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, resource.Table.ToArrowSchema()) scalar.AppendToRecordBuilder(bldr, vector) rec := bldr.NewRecord() - res <- rec + res <- &message.Insert{Record: rec} } return nil } -func (s *Scheduler) logTablesMetrics(tables schema.Tables, client schema.ClientMeta) { +func (s *Scheduler) logTablesMetrics(tables schema.Tables, client Client) { clientName := client.ID() for _, table := range tables { metrics := s.metrics.TableClient[table.Name][clientName] diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index 6eb6f3db01..fa5aa9669f 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -6,6 +6,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/scalar" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -229,25 +230,31 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist WithDeterministicCQId(deterministicCQID), } sc := NewScheduler(tables, &c, opts...) - records := make(chan arrow.Record, 10) - if err := sc.Sync(ctx, records); err != nil { + msgs := make(chan message.Message, 10) + if err := sc.Sync(ctx, msgs); err != nil { t.Fatal(err) } - close(records) + close(msgs) var i int - for record := range records { + for msg := range msgs { if tc.data == nil { - t.Fatalf("Unexpected resource %v", record) + t.Fatalf("Unexpected message %v", msg) } if i >= len(tc.data) { t.Fatalf("expected %d resources. got %d", len(tc.data), i) } - rec := tc.data[i].ToArrowRecord(record.Schema()) - if !array.RecordEqual(rec, record) { - t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) + switch v := msg.(type) { + case *message.Insert: + record := v.Record + rec := tc.data[i].ToArrowRecord(record.Schema()) + if !array.RecordEqual(rec, record) { + t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) + } + i++ + default: + t.Fatalf("expected insert message. got %v", msg) } - i++ } if len(tc.data) != i { t.Fatalf("expected %d resources. got %d", len(tc.data), i) diff --git a/serve/destination_v0_test.go.backup b/serve/destination_v0_test.go.backup index ff0ad377d3..6c2ca95965 100644 --- a/serve/destination_v0_test.go.backup +++ b/serve/destination_v0_test.go.backup @@ -139,7 +139,7 @@ func TestDestination(t *testing.T) { destRecord := serversDestination.CQTypesOneToRecord(memory.DefaultAllocator, destResource.Data, table.ToArrowSchema()) for _, msg := range msgs { totalResources++ - m := msg.(*plugin.MessageInsert) + m := msg.(*message.Insert) if !array.RecordEqual(destRecord, m.Record) { // diff := destination.RecordDiff(destRecord, resource) t.Fatalf("expected %v but got %v", destRecord, m.Record) diff --git a/serve/destination_v1_test.go.backup b/serve/destination_v1_test.go.backup index abc789ff2d..d12aea4db1 100644 --- a/serve/destination_v1_test.go.backup +++ b/serve/destination_v1_test.go.backup @@ -138,7 +138,7 @@ func TestDestinationV1(t *testing.T) { totalResources := 0 for _, msg := range msgs { totalResources++ - m := msg.(*plugin.MessageInsert) + m := msg.(*message.Insert) if !array.RecordEqual(rec, m.Record) { // diff := plugin.RecordDiff(rec, resource) // t.Fatalf("diff at %d: %s", totalResources, diff) diff --git a/writers/batch.go b/writers/batch.go index 34f15c84f0..26661d864f 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/internal/pk" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -16,7 +17,7 @@ import ( ) type Writer interface { - Write(ctx context.Context, writeOptions plugin.WriteOptions, res <-chan plugin.Message) error + Write(ctx context.Context, writeOptions plugin.WriteOptions, res <-chan message.Message) error } const ( @@ -27,9 +28,9 @@ const ( ) type BatchWriterClient interface { - MigrateTables(context.Context, []*plugin.MessageMigrateTable) error - WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*plugin.MessageInsert) error - DeleteStale(context.Context, []*plugin.MessageDeleteStale) error + MigrateTables(context.Context, []*message.MigrateTable) error + WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*message.Insert) error + DeleteStale(context.Context, []*message.DeleteStale) error } type BatchWriter struct { @@ -38,8 +39,8 @@ type BatchWriter struct { workers map[string]*worker workersLock *sync.RWMutex workersWaitGroup *sync.WaitGroup - migrateTableMessages []*plugin.MessageMigrateTable - deleteStaleMessages []*plugin.MessageDeleteStale + migrateTableMessages []*message.MigrateTable + deleteStaleMessages []*message.DeleteStale logger zerolog.Logger batchTimeout time.Duration @@ -82,7 +83,7 @@ func WithBatchSizeBytes(size int) Option { type worker struct { count int wg *sync.WaitGroup - ch chan *plugin.MessageInsert + ch chan *message.Insert flush chan chan bool } @@ -101,8 +102,8 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err for _, opt := range opts { opt(c) } - c.migrateTableMessages = make([]*plugin.MessageMigrateTable, 0, c.batchSize) - c.deleteStaleMessages = make([]*plugin.MessageDeleteStale, 0, c.batchSize) + c.migrateTableMessages = make([]*message.MigrateTable, 0, c.batchSize) + c.deleteStaleMessages = make([]*message.DeleteStale, 0, c.batchSize) return c, nil } @@ -130,9 +131,9 @@ func (w *BatchWriter) Close(ctx context.Context) error { return nil } -func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *plugin.MessageInsert, flush <-chan chan bool) { +func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *message.Insert, flush <-chan chan bool) { sizeBytes := int64(0) - resources := make([]*plugin.MessageInsert, 0) + resources := make([]*message.Insert, 0) upsertBatch := false for { select { @@ -145,7 +146,7 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *p } if upsertBatch != r.Upsert { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 upsertBatch = r.Upsert resources = append(resources, r) @@ -156,19 +157,19 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *p } if len(resources) >= w.batchSize || sizeBytes+util.TotalRecordSize(r.Record) >= int64(w.batchSizeBytes) { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 } case <-time.After(w.batchTimeout): if len(resources) > 0 { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*plugin.MessageInsert, 0) + resources = make([]*message.Insert, 0) sizeBytes = 0 } done <- true @@ -179,7 +180,7 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *p } } -func (w *BatchWriter) flush(ctx context.Context, tableName string, upsertBatch bool, resources []*plugin.MessageInsert) { +func (w *BatchWriter) flush(ctx context.Context, tableName string, upsertBatch bool, resources []*message.Insert) { // resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) @@ -247,8 +248,8 @@ func (w *BatchWriter) flushInsert(ctx context.Context, tableName string) { <-ch } -func (w *BatchWriter) writeAll(ctx context.Context, msgs []plugin.Message) error { - ch := make(chan plugin.Message, len(msgs)) +func (w *BatchWriter) writeAll(ctx context.Context, msgs []message.Message) error { + ch := make(chan message.Message, len(msgs)) for _, msg := range msgs { ch <- msg } @@ -256,10 +257,10 @@ func (w *BatchWriter) writeAll(ctx context.Context, msgs []plugin.Message) error return w.Write(ctx, ch) } -func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) error { +func (w *BatchWriter) Write(ctx context.Context, msgs <-chan message.Message) error { for msg := range msgs { switch m := msg.(type) { - case *plugin.MessageDeleteStale: + case *message.DeleteStale: if len(w.migrateTableMessages) > 0 { if err := w.flushMigrateTables(ctx); err != nil { return err @@ -272,7 +273,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err return err } } - case *plugin.MessageInsert: + case *message.Insert: if len(w.migrateTableMessages) > 0 { if err := w.flushMigrateTables(ctx); err != nil { return err @@ -286,7 +287,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err if err := w.startWorker(ctx, m); err != nil { return err } - case *plugin.MessageMigrateTable: + case *message.MigrateTable: w.flushInsert(ctx, m.Table.Name) if len(w.deleteStaleMessages) > 0 { if err := w.flushDeleteStaleTables(ctx); err != nil { @@ -304,7 +305,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan plugin.Message) err return nil } -func (w *BatchWriter) startWorker(ctx context.Context, msg *plugin.MessageInsert) error { +func (w *BatchWriter) startWorker(ctx context.Context, msg *message.Insert) error { w.workersLock.RLock() md := msg.Record.Schema().Metadata() tableName, ok := md.GetValue(schema.MetadataTableName) @@ -319,7 +320,7 @@ func (w *BatchWriter) startWorker(ctx context.Context, msg *plugin.MessageInsert return nil } w.workersLock.Lock() - ch := make(chan *plugin.MessageInsert) + ch := make(chan *message.Insert) flush := make(chan chan bool) wr = &worker{ count: 1, diff --git a/writers/batch_test.go b/writers/batch_test.go index deef3bc627..35452ca7ee 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -8,26 +8,26 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" ) type testBatchClient struct { - migrateTables []*plugin.MessageMigrateTable - inserts []*plugin.MessageInsert - deleteStales []*plugin.MessageDeleteStale + migrateTables []*message.MigrateTable + inserts []*message.Insert + deleteStales []*message.DeleteStale } -func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*plugin.MessageMigrateTable) error { +func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*message.MigrateTable) error { c.migrateTables = append(c.migrateTables, msgs...) return nil } -func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*plugin.MessageInsert) error { +func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*message.Insert) error { c.inserts = append(c.inserts, msgs...) return nil } -func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*plugin.MessageDeleteStale) error { +func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*message.DeleteStale) error { c.deleteStales = append(c.deleteStales, msgs...) return nil } @@ -67,13 +67,13 @@ func TestBatchFlushDifferentMessages(t *testing.T) { bldr := array.NewRecordBuilder(memory.DefaultAllocator, batchTestTables[0].ToArrowSchema()) bldr.Field(0).(*array.Int64Builder).Append(1) record := bldr.NewRecord() - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } if len(testClient.migrateTables) != 0 { t.Fatalf("expected 0 create table messages, got %d", len(testClient.migrateTables)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{Record: record}}); err != nil { + if err := wr.writeAll(ctx, []message.Message{&message.Insert{Record: record}}); err != nil { t.Fatal(err) } if len(testClient.migrateTables) != 1 { @@ -84,7 +84,7 @@ func TestBatchFlushDifferentMessages(t *testing.T) { t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageMigrateTable{Table: batchTestTables[0]}}); err != nil { + if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } @@ -103,7 +103,7 @@ func TestBatchSize(t *testing.T) { } table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} record := array.NewRecord(table.ToArrowSchema(), nil, 0) - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) @@ -113,7 +113,7 @@ func TestBatchSize(t *testing.T) { t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) @@ -136,7 +136,7 @@ func TestBatchTimeout(t *testing.T) { } table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} record := array.NewRecord(table.ToArrowSchema(), nil, 0) - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) @@ -171,7 +171,7 @@ func TestBatchUpserts(t *testing.T) { } table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} record := array.NewRecord(table.ToArrowSchema(), nil, 0) - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, Upsert: true, }}); err != nil { @@ -182,7 +182,7 @@ func TestBatchUpserts(t *testing.T) { t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) } - if err := wr.writeAll(ctx, []plugin.Message{&plugin.MessageInsert{ + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, }}); err != nil { t.Fatal(err) diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index 998a9d7c13..f6704cf488 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -6,6 +6,7 @@ import ( "time" "github.com/apache/arrow/go/v13/arrow/util" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" @@ -21,9 +22,9 @@ var allMsgTypes = []int{msgTypeMigrateTable, msgTypeInsert, msgTypeDeleteStale} // MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { - MigrateTableBatch(ctx context.Context, messages []*plugin.MessageMigrateTable, options plugin.WriteOptions) error - InsertBatch(ctx context.Context, messages []*plugin.MessageInsert, options plugin.WriteOptions) error - DeleteStaleBatch(ctx context.Context, messages []*plugin.MessageDeleteStale, options plugin.WriteOptions) error + MigrateTableBatch(ctx context.Context, messages []*message.MigrateTable, options plugin.WriteOptions) error + InsertBatch(ctx context.Context, messages []*message.Insert, options plugin.WriteOptions) error + DeleteStaleBatch(ctx context.Context, messages []*message.DeleteStale, options plugin.WriteOptions) error } type MixedBatchWriter struct { @@ -78,33 +79,33 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption return c, nil } -func msgID(msg plugin.Message) int { +func msgID(msg message.Message) int { switch msg.(type) { - case plugin.MessageMigrateTable, *plugin.MessageMigrateTable: + case message.MigrateTable, *message.MigrateTable: return msgTypeMigrateTable - case plugin.MessageInsert, *plugin.MessageInsert: + case message.Insert, *message.Insert: return msgTypeInsert - case plugin.MessageDeleteStale, *plugin.MessageDeleteStale: + case message.DeleteStale, *message.DeleteStale: return msgTypeDeleteStale } panic("unknown message type: " + reflect.TypeOf(msg).Name()) } // Write starts listening for messages on the msgChan channel and writes them to the client in batches. -func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan plugin.Message) error { - migrateTable := &batchManager[*plugin.MessageMigrateTable]{ - batch: make([]*plugin.MessageMigrateTable, 0, w.batchSize), +func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOptions, msgChan <-chan message.Message) error { + migrateTable := &batchManager[*message.MigrateTable]{ + batch: make([]*message.MigrateTable, 0, w.batchSize), writeFunc: w.client.MigrateTableBatch, writeOptions: options, } insert := &insertBatchManager{ - batch: make([]*plugin.MessageInsert, 0, w.batchSize), + batch: make([]*message.Insert, 0, w.batchSize), writeFunc: w.client.InsertBatch, maxBatchSizeBytes: int64(w.batchSizeBytes), writeOptions: options, } - deleteStale := &batchManager[*plugin.MessageDeleteStale]{ - batch: make([]*plugin.MessageDeleteStale, 0, w.batchSize), + deleteStale := &batchManager[*message.DeleteStale]{ + batch: make([]*message.DeleteStale, 0, w.batchSize), writeFunc: w.client.DeleteStaleBatch, writeOptions: options, } @@ -131,11 +132,11 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } prevMsgType = msgType switch v := msg.(type) { - case *plugin.MessageMigrateTable: + case *message.MigrateTable: err = migrateTable.append(ctx, v) - case *plugin.MessageInsert: + case *message.Insert: err = insert.append(ctx, v) - case *plugin.MessageDeleteStale: + case *message.DeleteStale: err = deleteStale.append(ctx, v) default: panic("unknown message type") @@ -151,7 +152,7 @@ func (w *MixedBatchWriter) Write(ctx context.Context, options plugin.WriteOption } // generic batch manager for most message types -type batchManager[T plugin.Message] struct { +type batchManager[T message.Message] struct { batch []T writeFunc func(ctx context.Context, messages []T, options plugin.WriteOptions) error writeOptions plugin.WriteOptions @@ -182,14 +183,14 @@ func (m *batchManager[T]) flush(ctx context.Context) error { // special batch manager for insert messages that also keeps track of the total size of the batch type insertBatchManager struct { - batch []*plugin.MessageInsert - writeFunc func(ctx context.Context, messages []*plugin.MessageInsert, writeOptions plugin.WriteOptions) error + batch []*message.Insert + writeFunc func(ctx context.Context, messages []*message.Insert, writeOptions plugin.WriteOptions) error curBatchSizeBytes int64 maxBatchSizeBytes int64 writeOptions plugin.WriteOptions } -func (m *insertBatchManager) append(ctx context.Context, msg *plugin.MessageInsert) error { +func (m *insertBatchManager) append(ctx context.Context, msg *message.Insert) error { if len(m.batch) == cap(m.batch) || m.curBatchSizeBytes+util.TotalRecordSize(msg.Record) > m.maxBatchSizeBytes { if err := m.flush(ctx); err != nil { return err diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 8b952356e2..1ad37cc02f 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -8,16 +8,17 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" ) type testMixedBatchClient struct { - receivedBatches [][]plugin.Message + receivedBatches [][]message.Message } -func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*plugin.MessageMigrateTable, options plugin.WriteOptions) error { - m := make([]plugin.Message, len(msgs)) +func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*message.MigrateTable, options plugin.WriteOptions) error { + m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg } @@ -25,8 +26,8 @@ func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*pl return nil } -func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*plugin.MessageInsert, options plugin.WriteOptions) error { - m := make([]plugin.Message, len(msgs)) +func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*message.Insert, options plugin.WriteOptions) error { + m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg } @@ -34,8 +35,8 @@ func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*plugin.M return nil } -func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []*plugin.MessageDeleteStale, options plugin.WriteOptions) error { - m := make([]plugin.Message, len(msgs)) +func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []*message.DeleteStale, options plugin.WriteOptions) error { + m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg } @@ -58,7 +59,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgMigrateTable1 := &plugin.MessageMigrateTable{ + msgMigrateTable1 := &message.MigrateTable{ Table: table1, } @@ -72,7 +73,7 @@ func TestMixedBatchWriter(t *testing.T) { }, }, } - msgMigrateTable2 := &plugin.MessageMigrateTable{ + msgMigrateTable2 := &message.MigrateTable{ Table: table2, } @@ -80,7 +81,7 @@ func TestMixedBatchWriter(t *testing.T) { bldr1 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr1.Field(0).(*array.Int64Builder).Append(1) rec1 := bldr1.NewRecord() - msgInsertTable1 := &plugin.MessageInsert{ + msgInsertTable1 := &message.Insert{ Record: rec1, } @@ -88,18 +89,18 @@ func TestMixedBatchWriter(t *testing.T) { bldr2 := array.NewRecordBuilder(memory.DefaultAllocator, table1.ToArrowSchema()) bldr2.Field(0).(*array.Int64Builder).Append(1) rec2 := bldr2.NewRecord() - msgInsertTable2 := &plugin.MessageInsert{ + msgInsertTable2 := &message.Insert{ Record: rec2, Upsert: false, } // message to delete stale from table1 - msgDeleteStale1 := &plugin.MessageDeleteStale{ + msgDeleteStale1 := &message.DeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), } - msgDeleteStale2 := &plugin.MessageDeleteStale{ + msgDeleteStale2 := &message.DeleteStale{ Table: table1, SourceName: "my-source", SyncTime: time.Now(), @@ -107,12 +108,12 @@ func TestMixedBatchWriter(t *testing.T) { testCases := []struct { name string - messages []plugin.Message - wantBatches [][]plugin.Message + messages []message.Message + wantBatches [][]message.Message }{ { name: "create table, insert, delete stale", - messages: []plugin.Message{ + messages: []message.Message{ msgMigrateTable1, msgMigrateTable2, msgInsertTable1, @@ -120,7 +121,7 @@ func TestMixedBatchWriter(t *testing.T) { msgDeleteStale1, msgDeleteStale2, }, - wantBatches: [][]plugin.Message{ + wantBatches: [][]message.Message{ {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1, msgInsertTable2}, {msgDeleteStale1, msgDeleteStale2}, @@ -128,7 +129,7 @@ func TestMixedBatchWriter(t *testing.T) { }, { name: "interleaved messages", - messages: []plugin.Message{ + messages: []message.Message{ msgMigrateTable1, msgInsertTable1, msgDeleteStale1, @@ -136,7 +137,7 @@ func TestMixedBatchWriter(t *testing.T) { msgInsertTable2, msgDeleteStale2, }, - wantBatches: [][]plugin.Message{ + wantBatches: [][]message.Message{ {msgMigrateTable1}, {msgInsertTable1}, {msgDeleteStale1}, @@ -147,7 +148,7 @@ func TestMixedBatchWriter(t *testing.T) { }, { name: "interleaved messages", - messages: []plugin.Message{ + messages: []message.Message{ msgMigrateTable1, msgMigrateTable2, msgInsertTable1, @@ -155,7 +156,7 @@ func TestMixedBatchWriter(t *testing.T) { msgInsertTable2, msgDeleteStale1, }, - wantBatches: [][]plugin.Message{ + wantBatches: [][]message.Message{ {msgMigrateTable1, msgMigrateTable2}, {msgInsertTable1}, {msgDeleteStale2}, @@ -168,13 +169,13 @@ func TestMixedBatchWriter(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { client := &testMixedBatchClient{ - receivedBatches: make([][]plugin.Message, 0), + receivedBatches: make([][]message.Message, 0), } wr, err := NewMixedBatchWriter(client) if err != nil { t.Fatal(err) } - ch := make(chan plugin.Message, len(tc.messages)) + ch := make(chan message.Message, len(tc.messages)) for _, msg := range tc.messages { ch <- msg } From 3d612fdb4bfe261cc413330b62e502e19e2958b5 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 18:33:42 +0300 Subject: [PATCH 087/125] move to []byte --- internal/memdb/memdb.go | 9 ++++++--- internal/servers/destination/v1/destinations.go | 6 +++++- internal/servers/plugin/v3/plugin.go | 7 +------ plugin/plugin.go | 9 ++------- plugin/plugin_source.go | 4 ++-- plugin/plugin_test.go | 4 ++-- plugin/testing_sync.go | 2 +- 7 files changed, 19 insertions(+), 22 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 4c273ad3a4..42d2f89c16 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -24,6 +24,9 @@ type client struct { type Option func(*client) +type Spec struct { +} + func WithErrOnWrite() Option { return func(c *client) { c.errOnWrite = true @@ -44,12 +47,12 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { for _, opt := range options { opt(c) } - return func(context.Context, zerolog.Logger, any) (plugin.Client, error) { + return func(context.Context, zerolog.Logger, []byte) (plugin.Client, error) { return c, nil } } -func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec any) (plugin.Client, error) { +func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec []byte) (plugin.Client, error) { return &client{ memoryDB: make(map[string][]arrow.Record), tables: make(map[string]*schema.Table), @@ -86,7 +89,7 @@ func (c *client) ID() string { } func (c *client) GetSpec() any { - return &struct{}{} + return &Spec{} } func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index b3534fc56a..83f02462d9 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -35,7 +35,11 @@ func (s *Server) Configure(ctx context.Context, req *pb.Configure_Request) (*pb. return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pb.Configure_Response{}, s.Plugin.Init(ctx, s.spec.Spec) + pluginSpec, err := json.Marshal(s.spec.Spec) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to marshal spec: %v", err) + } + return &pb.Configure_Response{}, s.Plugin.Init(ctx, pluginSpec) } func (s *Server) GetName(context.Context, *pb.GetName_Request) (*pb.GetName_Response, error) { diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 2a0b3ce429..77c41bea29 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -2,7 +2,6 @@ package plugin import ( "context" - "encoding/json" "errors" "fmt" "io" @@ -59,11 +58,7 @@ func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVer } func (s *Server) Init(ctx context.Context, req *pb.Init_Request) (*pb.Init_Response, error) { - pluginSpec := s.Plugin.GetSpec() - if err := json.Unmarshal(req.GetSpec(), &pluginSpec); err != nil { - return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal plugin spec: %v", err) - } - if err := s.Plugin.Init(ctx, pluginSpec); err != nil { + if err := s.Plugin.Init(ctx, req.Spec); err != nil { return nil, status.Errorf(codes.Internal, "failed to init plugin: %v", err) } return &pb.Init_Response{}, nil diff --git a/plugin/plugin.go b/plugin/plugin.go index d327ee8b87..81fe39dcfd 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -13,7 +13,7 @@ import ( var ErrNotImplemented = fmt.Errorf("not implemented") -type NewClientFunc func(context.Context, zerolog.Logger, any) (Client, error) +type NewClientFunc func(context.Context, zerolog.Logger, []byte) (Client, error) type Client interface { SourceClient @@ -120,13 +120,8 @@ func (p *Plugin) Tables(ctx context.Context) (schema.Tables, error) { return tables, nil } -// GetSpec returns an empty struct to be filled with the plugin's configuration. -func (p *Plugin) GetSpec() any { - return p.client.GetSpec() -} - // Init initializes the plugin with the given spec. -func (p *Plugin) Init(ctx context.Context, spec any) error { +func (p *Plugin) Init(ctx context.Context, spec []byte) error { if !p.mu.TryLock() { return fmt.Errorf("plugin already in use") } diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index c6d6089751..6b015ae7ea 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -46,8 +46,8 @@ type NewSourceClientFunc func(context.Context, zerolog.Logger, any) (SourceClien // NewSourcePlugin returns a new CloudQuery Plugin with the given name, version and implementation. // Source plugins only support read operations. For Read & Write plugin use NewPlugin. func NewSourcePlugin(name string, version string, newClient NewSourceClientFunc, options ...Option) *Plugin { - newClientWrapper := func(ctx context.Context, logger zerolog.Logger, any any) (Client, error) { - sourceClient, err := newClient(ctx, logger, any) + newClientWrapper := func(ctx context.Context, logger zerolog.Logger, spec []byte) (Client, error) { + sourceClient, err := newClient(ctx, logger, spec) if err != nil { return nil, err } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index b96d9fc657..d6fdfcba79 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -17,7 +17,7 @@ type testPluginClient struct { messages []message.Message } -func newTestPluginClient(context.Context, zerolog.Logger, any) (Client, error) { +func newTestPluginClient(context.Context, zerolog.Logger, []byte) (Client, error) { return &testPluginClient{}, nil } @@ -52,7 +52,7 @@ func (c *testPluginClient) Close(context.Context) error { func TestPluginSuccess(t *testing.T) { ctx := context.Background() p := NewPlugin("test", "v1.0.0", newTestPluginClient) - if err := p.Init(ctx, &testPluginSpec{}); err != nil { + if err := p.Init(ctx, []byte("")); err != nil { t.Fatal(err) } tables, err := p.Tables(ctx) diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 608b7cd653..4bea08f8c1 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -14,7 +14,7 @@ import ( type Validator func(t *testing.T, plugin *Plugin, resources []message.Message) -func TestPluginSync(t *testing.T, plugin *Plugin, spec any, options SyncOptions, opts ...TestPluginOption) { +func TestPluginSync(t *testing.T, plugin *Plugin, spec []byte, options SyncOptions, opts ...TestPluginOption) { t.Helper() o := &testPluginOptions{ From fd33ddeaa9ba614fdff087e13b763e2004140bf9 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 21:05:28 +0300 Subject: [PATCH 088/125] fix data race --- writers/batch.go | 62 +++++++++++++++-------------- writers/batch_test.go | 91 ++++++++++++++++++++++++++++++------------- 2 files changed, 97 insertions(+), 56 deletions(-) diff --git a/writers/batch.go b/writers/batch.go index 26661d864f..e4839490d3 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -13,7 +13,6 @@ import ( "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" - "golang.org/x/sync/semaphore" ) type Writer interface { @@ -22,7 +21,6 @@ type Writer interface { const ( defaultBatchTimeoutSeconds = 20 - defaultMaxWorkers = int64(10000) defaultBatchSize = 10000 defaultBatchSizeBytes = 5 * 1024 * 1024 // 5 MiB ) @@ -35,11 +33,13 @@ type BatchWriterClient interface { type BatchWriter struct { client BatchWriterClient - semaphore *semaphore.Weighted workers map[string]*worker workersLock *sync.RWMutex workersWaitGroup *sync.WaitGroup + + migrateTableLock *sync.Mutex migrateTableMessages []*message.MigrateTable + deleteStaleLock *sync.Mutex deleteStaleMessages []*message.DeleteStale logger zerolog.Logger @@ -62,12 +62,6 @@ func WithBatchTimeout(timeout time.Duration) Option { } } -func WithMaxWorkers(n int64) Option { - return func(p *BatchWriter) { - p.semaphore = semaphore.NewWeighted(n) - } -} - func WithBatchSize(size int) Option { return func(p *BatchWriter) { p.batchSize = size @@ -82,7 +76,6 @@ func WithBatchSizeBytes(size int) Option { type worker struct { count int - wg *sync.WaitGroup ch chan *message.Insert flush chan chan bool } @@ -93,11 +86,12 @@ func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, err workers: make(map[string]*worker), workersLock: &sync.RWMutex{}, workersWaitGroup: &sync.WaitGroup{}, + migrateTableLock: &sync.Mutex{}, + deleteStaleLock: &sync.Mutex{}, logger: zerolog.Nop(), batchTimeout: defaultBatchTimeoutSeconds * time.Second, batchSize: defaultBatchSize, batchSizeBytes: defaultBatchSizeBytes, - semaphore: semaphore.NewWeighted(defaultMaxWorkers), } for _, opt := range opts { opt(c) @@ -219,6 +213,11 @@ func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow. } func (w *BatchWriter) flushMigrateTables(ctx context.Context) error { + w.migrateTableLock.Lock() + defer w.migrateTableLock.Unlock() + if len(w.migrateTableMessages) == 0 { + return nil + } if err := w.client.MigrateTables(ctx, w.migrateTableMessages); err != nil { return err } @@ -227,6 +226,11 @@ func (w *BatchWriter) flushMigrateTables(ctx context.Context) error { } func (w *BatchWriter) flushDeleteStaleTables(ctx context.Context) error { + w.deleteStaleLock.Lock() + defer w.deleteStaleLock.Unlock() + if len(w.deleteStaleMessages) == 0 { + return nil + } if err := w.client.DeleteStale(ctx, w.deleteStaleMessages); err != nil { return err } @@ -261,41 +265,39 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan message.Message) er for msg := range msgs { switch m := msg.(type) { case *message.DeleteStale: - if len(w.migrateTableMessages) > 0 { - if err := w.flushMigrateTables(ctx); err != nil { - return err - } + if err := w.flushMigrateTables(ctx); err != nil { + return err } w.flushInsert(ctx, m.Table.Name) + w.deleteStaleLock.Lock() w.deleteStaleMessages = append(w.deleteStaleMessages, m) - if len(w.deleteStaleMessages) > w.batchSize { + l := len(w.deleteStaleMessages) + w.deleteStaleLock.Unlock() + if l > w.batchSize { if err := w.flushDeleteStaleTables(ctx); err != nil { return err } } case *message.Insert: - if len(w.migrateTableMessages) > 0 { - if err := w.flushMigrateTables(ctx); err != nil { - return err - } + if err := w.flushMigrateTables(ctx); err != nil { + return err } - if len(w.deleteStaleMessages) > 0 { - if err := w.flushDeleteStaleTables(ctx); err != nil { - return err - } + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err } if err := w.startWorker(ctx, m); err != nil { return err } case *message.MigrateTable: w.flushInsert(ctx, m.Table.Name) - if len(w.deleteStaleMessages) > 0 { - if err := w.flushDeleteStaleTables(ctx); err != nil { - return err - } + if err := w.flushDeleteStaleTables(ctx); err != nil { + return err } + w.migrateTableLock.Lock() w.migrateTableMessages = append(w.migrateTableMessages, m) - if len(w.migrateTableMessages) > w.batchSize { + l := len(w.migrateTableMessages) + w.migrateTableLock.Unlock() + if l > w.batchSize { if err := w.flushMigrateTables(ctx); err != nil { return err } @@ -316,7 +318,7 @@ func (w *BatchWriter) startWorker(ctx context.Context, msg *message.Insert) erro wr, ok := w.workers[tableName] w.workersLock.RUnlock() if ok { - w.workers[tableName].ch <- msg + wr.ch <- msg return nil } w.workersLock.Lock() diff --git a/writers/batch_test.go b/writers/batch_test.go index 35452ca7ee..636553d8d5 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -2,6 +2,7 @@ package writers import ( "context" + "sync" "testing" "time" @@ -13,21 +14,46 @@ import ( ) type testBatchClient struct { + mutex *sync.Mutex migrateTables []*message.MigrateTable inserts []*message.Insert deleteStales []*message.DeleteStale } +func (c *testBatchClient) MigrateTablesLen() int { + c.mutex.Lock() + defer c.mutex.Unlock() + return len(c.migrateTables) +} + +func (c *testBatchClient) InsertsLen() int { + c.mutex.Lock() + defer c.mutex.Unlock() + return len(c.inserts) +} + +func (c *testBatchClient) DeleteStalesLen() int { + c.mutex.Lock() + defer c.mutex.Unlock() + return len(c.deleteStales) +} + func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*message.MigrateTable) error { + c.mutex.Lock() + defer c.mutex.Unlock() c.migrateTables = append(c.migrateTables, msgs...) return nil } func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*message.Insert) error { + c.mutex.Lock() + defer c.mutex.Unlock() c.inserts = append(c.inserts, msgs...) return nil } func (c *testBatchClient) DeleteStale(_ context.Context, msgs []*message.DeleteStale) error { + c.mutex.Lock() + defer c.mutex.Unlock() c.deleteStales = append(c.deleteStales, msgs...) return nil } @@ -58,7 +84,9 @@ var batchTestTables = schema.Tables{ func TestBatchFlushDifferentMessages(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient) if err != nil { t.Fatal(err) @@ -70,33 +98,40 @@ func TestBatchFlushDifferentMessages(t *testing.T) { if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } - if len(testClient.migrateTables) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.migrateTables)) + + if testClient.MigrateTablesLen() != 0 { + t.Fatalf("expected 0 create table messages, got %d", testClient.MigrateTablesLen()) } + if err := wr.writeAll(ctx, []message.Message{&message.Insert{Record: record}}); err != nil { t.Fatal(err) } - if len(testClient.migrateTables) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.migrateTables)) + + if testClient.MigrateTablesLen() != 1 { + t.Fatalf("expected 1 migrate table messages, got %d", testClient.MigrateTablesLen()) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 insert messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } + if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } - if len(testClient.inserts) != 1 { - t.Fatalf("expected 1 insert messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 1 { + t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) } + } func TestBatchSize(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient, WithBatchSize(2)) if err != nil { t.Fatal(err) @@ -109,8 +144,8 @@ func TestBatchSize(t *testing.T) { t.Fatal(err) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } if err := wr.writeAll(ctx, []message.Message{&message.Insert{ @@ -121,15 +156,17 @@ func TestBatchSize(t *testing.T) { // we need to wait for the batch to be flushed time.Sleep(time.Second * 2) - if len(testClient.inserts) != 2 { - t.Fatalf("expected 2 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 2 { + t.Fatalf("expected 2 insert messages, got %d", testClient.InsertsLen()) } } func TestBatchTimeout(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient, WithBatchTimeout(time.Second)) if err != nil { t.Fatal(err) @@ -142,29 +179,31 @@ func TestBatchTimeout(t *testing.T) { t.Fatal(err) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } // we need to wait for the batch to be flushed time.Sleep(time.Millisecond * 250) - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } // we need to wait for the batch to be flushed time.Sleep(time.Second * 1) - if len(testClient.inserts) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 1 { + t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) } } func TestBatchUpserts(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{} + testClient := &testBatchClient{ + mutex: &sync.Mutex{}, + } wr, err := NewBatchWriter(testClient) if err != nil { t.Fatal(err) @@ -178,8 +217,8 @@ func TestBatchUpserts(t *testing.T) { t.Fatal(err) } - if len(testClient.inserts) != 0 { - t.Fatalf("expected 0 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 0 { + t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } if err := wr.writeAll(ctx, []message.Message{&message.Insert{ @@ -190,7 +229,7 @@ func TestBatchUpserts(t *testing.T) { // we need to wait for the batch to be flushed time.Sleep(time.Second * 2) - if len(testClient.inserts) != 1 { - t.Fatalf("expected 1 create table messages, got %d", len(testClient.inserts)) + if testClient.InsertsLen() != 1 { + t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) } } From 5c1bf00cddcf20e0535127b64a1da2c8a2b1374b Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 21:59:45 +0300 Subject: [PATCH 089/125] tests passing --- ...nation_v0_test.go.backup => destination_v0_test.go} | 9 +++++---- ...nation_v1_test.go.backup => destination_v1_test.go} | 1 + serve/plugin.go | 8 ++++---- serve/plugin_test.go | 7 ++++--- writers/batch.go | 10 +++++----- writers/batch_test.go | 3 +-- 6 files changed, 20 insertions(+), 18 deletions(-) rename serve/{destination_v0_test.go.backup => destination_v0_test.go} (95%) rename serve/{destination_v1_test.go.backup => destination_v1_test.go} (98%) diff --git a/serve/destination_v0_test.go.backup b/serve/destination_v0_test.go similarity index 95% rename from serve/destination_v0_test.go.backup rename to serve/destination_v0_test.go index 6c2ca95965..150cbc29fb 100644 --- a/serve/destination_v0_test.go.backup +++ b/serve/destination_v0_test.go @@ -17,6 +17,7 @@ import ( "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" serversDestination "github.com/cloudquery/plugin-sdk/v4/internal/servers/destination/v0" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -41,7 +42,7 @@ func TestDestination(t *testing.T) { }() // https://stackoverflow.com/questions/42102496/testing-a-grpc-service - conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + conn, err := grpc.DialContext(ctx, "bufnet1", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } @@ -53,10 +54,10 @@ func TestDestination(t *testing.T) { if err != nil { t.Fatal(err) } + if _, err := c.Configure(ctx, &pbBase.Configure_Request{Config: specBytes}); err != nil { t.Fatal(err) } - getNameRes, err := c.GetName(ctx, &pbBase.GetName_Request{}) if err != nil { t.Fatal(err) @@ -117,7 +118,6 @@ func TestDestination(t *testing.T) { }); err != nil { t.Fatal(err) } - if err := writeClient.Send(&pb.Write2_Request{ Resource: destResourceBytes, }); err != nil { @@ -127,6 +127,7 @@ func TestDestination(t *testing.T) { if _, err := writeClient.CloseAndRecv(); err != nil { t.Fatal(err) } + // serversDestination table := serversDestination.TableV2ToV3(tableV2) msgs, err := p.SyncAll(ctx, plugin.SyncOptions{ @@ -148,6 +149,7 @@ func TestDestination(t *testing.T) { if totalResources != 1 { t.Fatalf("expected 1 resource but got %d", totalResources) } + if _, err := c.DeleteStale(ctx, &pb.DeleteStale_Request{ Source: "testSource", Timestamp: timestamppb.New(time.Now().Truncate(time.Microsecond)), @@ -164,7 +166,6 @@ func TestDestination(t *testing.T) { if _, err := c.Close(ctx, &pb.Close_Request{}); err != nil { t.Fatalf("failed to call Close: %v", err) } - cancel() wg.Wait() if serverErr != nil { diff --git a/serve/destination_v1_test.go.backup b/serve/destination_v1_test.go similarity index 98% rename from serve/destination_v1_test.go.backup rename to serve/destination_v1_test.go index d12aea4db1..11e1ab738c 100644 --- a/serve/destination_v1_test.go.backup +++ b/serve/destination_v1_test.go @@ -13,6 +13,7 @@ import ( pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" "github.com/cloudquery/plugin-pb-go/specs/v0" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" "google.golang.org/grpc" diff --git a/serve/plugin.go b/serve/plugin.go index 9c55830987..e18a745a37 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -30,7 +30,6 @@ import ( "github.com/rs/zerolog/log" "github.com/spf13/cobra" "github.com/thoas/go-funk" - "golang.org/x/net/netutil" "google.golang.org/grpc" "google.golang.org/grpc/test/bufconn" ) @@ -96,6 +95,7 @@ func (s *PluginServe) Serve(ctx context.Context) error { if err := types.RegisterAllExtensions(); err != nil { return err } + defer types.UnregisterAllExtensions() cmd := s.newCmdPluginRoot() if s.args != nil { cmd.SetArgs(s.args) @@ -132,7 +132,6 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { } else { logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout}).Level(zerologLevel) } - // opts.Plugin.Logger = logger var listener net.Listener if s.testListener { @@ -143,9 +142,10 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { return fmt.Errorf("failed to listen %s:%s: %w", network, address, err) } } + defer listener.Close() // source plugins can only accept one connection at a time // unlike destination plugins that can accept multiple connections - limitListener := netutil.LimitListener(listener, 1) + // limitListener := netutil.LimitListener(listener, 1) // See logging pattern https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2/providers/zerolog/examples_test.go grpcServer := grpc.NewServer( grpc.ChainUnaryInterceptor( @@ -226,7 +226,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { }() logger.Info().Str("address", listener.Addr().String()).Msg("Source plugin server listening") - if err := grpcServer.Serve(limitListener); err != nil { + if err := grpcServer.Serve(listener); err != nil { return fmt.Errorf("failed to serve: %w", err) } return nil diff --git a/serve/plugin_test.go b/serve/plugin_test.go index e61555a2fc..39e48c808b 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -21,7 +21,7 @@ import ( func TestPluginServe(t *testing.T) { p := plugin.NewPlugin( - "testPlugin", + "testPluginV3", "v1.0.0", memdb.NewMemDBClient) srv := Plugin(p, WithArgs("serve"), WithTestListener()) @@ -44,14 +44,15 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatalf("Failed to dial bufnet: %v", err) } + c := pb.NewPluginClient(conn) getNameRes, err := c.GetName(ctx, &pb.GetName_Request{}) if err != nil { t.Fatal(err) } - if getNameRes.Name != "testPlugin" { - t.Fatalf("expected name to be testPlugin but got %s", getNameRes.Name) + if getNameRes.Name != "testPluginV3" { + t.Fatalf("expected name to be testPluginV3 but got %s", getNameRes.Name) } getVersionResponse, err := c.GetVersion(ctx, &pb.GetVersion_Request{}) diff --git a/writers/batch.go b/writers/batch.go index e4839490d3..67643bc53f 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -32,12 +32,12 @@ type BatchWriterClient interface { } type BatchWriter struct { - client BatchWriterClient - workers map[string]*worker - workersLock *sync.RWMutex - workersWaitGroup *sync.WaitGroup + client BatchWriterClient + workers map[string]*worker + workersLock *sync.RWMutex + workersWaitGroup *sync.WaitGroup - migrateTableLock *sync.Mutex + migrateTableLock *sync.Mutex migrateTableMessages []*message.MigrateTable deleteStaleLock *sync.Mutex deleteStaleMessages []*message.DeleteStale diff --git a/writers/batch_test.go b/writers/batch_test.go index 636553d8d5..e523f1decb 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -14,7 +14,7 @@ import ( ) type testBatchClient struct { - mutex *sync.Mutex + mutex *sync.Mutex migrateTables []*message.MigrateTable inserts []*message.Insert deleteStales []*message.DeleteStale @@ -115,7 +115,6 @@ func TestBatchFlushDifferentMessages(t *testing.T) { t.Fatalf("expected 0 insert messages, got %d", testClient.InsertsLen()) } - if err := wr.writeAll(ctx, []message.Message{&message.MigrateTable{Table: batchTestTables[0]}}); err != nil { t.Fatal(err) } From 29473dfcc0c203fdeea71538ac01d9f67e854505 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 22:34:52 +0300 Subject: [PATCH 090/125] Remove GetSpec --- plugin/plugin_destination.go | 1 - plugin/plugin_source.go | 1 - 2 files changed, 2 deletions(-) diff --git a/plugin/plugin_destination.go b/plugin/plugin_destination.go index 2904b2a9a1..68890bbd40 100644 --- a/plugin/plugin_destination.go +++ b/plugin/plugin_destination.go @@ -13,7 +13,6 @@ type WriteOptions struct { } type DestinationClient interface { - GetSpec() any Close(ctx context.Context) error Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index 6b015ae7ea..d6bf744ac7 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -21,7 +21,6 @@ type SyncOptions struct { } type SourceClient interface { - GetSpec() any Close(ctx context.Context) error Tables(ctx context.Context) (schema.Tables, error) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error From 8c6e325e555cf034c714fba8fba56f399dda2a08 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 16 Jun 2023 22:58:04 +0300 Subject: [PATCH 091/125] fix destination v0 --- internal/servers/destination/v0/destinations.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index af89f7de26..8e13b14531 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -40,7 +40,11 @@ func (s *Server) Configure(ctx context.Context, req *pbBase.Configure_Request) ( return nil, status.Errorf(codes.InvalidArgument, "failed to unmarshal spec: %v", err) } s.spec = spec - return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, nil) + pluginSpec, err := json.Marshal(s.spec.Spec) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "failed to marshal spec: %v", err) + } + return &pbBase.Configure_Response{}, s.Plugin.Init(ctx, pluginSpec) } func (s *Server) GetName(context.Context, *pbBase.GetName_Request) (*pbBase.GetName_Response, error) { From 7ee3599cd5e2268af02f312d7b9d28225f9f3f6b Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 17 Jun 2023 12:40:31 +0300 Subject: [PATCH 092/125] more wip --- internal/servers/plugin/v3/plugin.go | 5 +- message/message.go | 28 +++++- plugin/plugin_source.go | 1 - plugin/testing_sync.go | 135 --------------------------- scheduler/scheduler.go | 42 +++++++-- scheduler/scheduler_test.go | 4 +- schema/validators.go | 27 ++++++ writers/mixed_batch.go | 6 +- 8 files changed, 96 insertions(+), 152 deletions(-) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 77c41bea29..788aaec743 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -70,9 +70,8 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { ctx := stream.Context() syncOptions := plugin.SyncOptions{ - Tables: req.Tables, - SkipTables: req.SkipTables, - Concurrency: req.Concurrency, + Tables: req.Tables, + SkipTables: req.SkipTables, } if req.StateBackend != nil { diff --git a/message/message.go b/message/message.go index f30f5a4308..e70ced4dbd 100644 --- a/message/message.go +++ b/message/message.go @@ -24,7 +24,7 @@ type Insert struct { Upsert bool } -func (m Insert) GetTable() *schema.Table { +func (m *Insert) GetTable() *schema.Table { table, err := schema.NewTableFromArrowSchema(m.Record.Schema()) if err != nil { panic(err) @@ -63,6 +63,17 @@ func (messages Messages) InsertItems() int64 { return items } +func (messages Messages) InsertMessage() Inserts { + inserts := []*Insert{} + for _, msg := range messages { + switch m := msg.(type) { + case *Insert: + inserts = append(inserts, m) + } + } + return inserts +} + func (m MigrateTables) Exists(tableName string) bool { for _, table := range m { if table.Table.Name == tableName { @@ -85,3 +96,18 @@ func (m Inserts) Exists(tableName string) bool { } return false } + +func (m Inserts) GetRecordsForTable(table *schema.Table) []arrow.Record { + res := []arrow.Record{} + for _, insert := range m { + md := insert.Record.Schema().Metadata() + tableNameMeta, ok := md.GetValue(schema.MetadataTableName) + if !ok { + continue + } + if tableNameMeta == table.Name { + res = append(res, insert.Record) + } + } + return res +} diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index d6bf744ac7..118bece1d8 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -15,7 +15,6 @@ import ( type SyncOptions struct { Tables []string SkipTables []string - Concurrency int64 DeterministicCQID bool StateBackend state.Client } diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go index 4bea08f8c1..edf045f055 100644 --- a/plugin/testing_sync.go +++ b/plugin/testing_sync.go @@ -1,148 +1,13 @@ package plugin import ( - "context" "fmt" "strings" - "testing" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/cloudquery/plugin-sdk/v4/message" - "github.com/cloudquery/plugin-sdk/v4/schema" ) -type Validator func(t *testing.T, plugin *Plugin, resources []message.Message) - -func TestPluginSync(t *testing.T, plugin *Plugin, spec []byte, options SyncOptions, opts ...TestPluginOption) { - t.Helper() - - o := &testPluginOptions{ - parallel: true, - validators: []Validator{validatePlugin}, - } - for _, opt := range opts { - opt(o) - } - if o.parallel { - t.Parallel() - } - - resourcesChannel := make(chan message.Message) - var syncErr error - - if err := plugin.Init(context.Background(), spec); err != nil { - t.Fatal(err) - } - - go func() { - defer close(resourcesChannel) - syncErr = plugin.Sync(context.Background(), options, resourcesChannel) - }() - - syncedResources := make([]message.Message, 0) - for resource := range resourcesChannel { - syncedResources = append(syncedResources, resource) - } - if syncErr != nil { - t.Fatal(syncErr) - } - for _, validator := range o.validators { - validator(t, plugin, syncedResources) - } -} - -type TestPluginOption func(*testPluginOptions) - -func WithTestPluginNoParallel() TestPluginOption { - return func(f *testPluginOptions) { - f.parallel = false - } -} - -func WithTestPluginAdditionalValidators(v Validator) TestPluginOption { - return func(f *testPluginOptions) { - f.validators = append(f.validators, v) - } -} - -type testPluginOptions struct { - parallel bool - validators []Validator -} - -func getTableResources(t *testing.T, table *schema.Table, messages []message.Message) []arrow.Record { - t.Helper() - - tableResources := make([]arrow.Record, 0) - for _, msg := range messages { - switch v := msg.(type) { - case *message.Insert: - md := v.Record.Schema().Metadata() - tableName, ok := md.GetValue(schema.MetadataTableName) - if !ok { - t.Errorf("Expected table name to be set in metadata") - } - if tableName == table.Name { - tableResources = append(tableResources, v.Record) - } - default: - t.Errorf("Unexpected message type %T", v) - } - } - - return tableResources -} - -func validateTable(t *testing.T, table *schema.Table, messages []message.Message) { - t.Helper() - tableResources := getTableResources(t, table, messages) - if len(tableResources) == 0 { - t.Errorf("Expected table %s to be synced but it was not found", table.Name) - return - } - validateResources(t, table, tableResources) -} - -func validatePlugin(t *testing.T, plugin *Plugin, resources []message.Message) { - t.Helper() - tables, err := plugin.Tables(context.Background()) - if err != nil { - t.Fatal(err) - } - for _, table := range tables.FlattenTables() { - validateTable(t, table, resources) - } -} - -// Validates that every column has at least one non-nil value. -// Also does some additional validations. -func validateResources(t *testing.T, table *schema.Table, resources []arrow.Record) { - t.Helper() - - // A set of column-names that have values in at least one of the resources. - columnsWithValues := make([]bool, len(table.Columns)) - - for _, resource := range resources { - for _, arr := range resource.Columns() { - for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - columnsWithValues[i] = true - } - } - } - } - - // Make sure every column has at least one value. - for i, hasValue := range columnsWithValues { - col := table.Columns[i] - emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil - if !hasValue && !emptyExpected && !col.IgnoreInTests { - t.Errorf("table: %s column %s has no values", table.Name, table.Columns[i].Name) - } - } -} - func RecordDiff(l arrow.Record, r arrow.Record) string { var sb strings.Builder if l.NumCols() != r.NumCols() { diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 9d53abbb15..d1dc149804 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -25,6 +25,7 @@ const ( minTableConcurrency = 1 minResourceConcurrency = 100 defaultConcurrency = 200000 + defaultMaxDepth = 4 ) type Strategy int @@ -77,6 +78,12 @@ func WithConcurrency(concurrency uint64) Option { } } +func WithMaxDepth(maxDepth uint64) Option { + return func(s *Scheduler) { + s.maxDepth = maxDepth + } +} + func WithSchedulerStrategy(strategy Strategy) Option { return func(s *Scheduler) { s.strategy = strategy @@ -105,25 +112,46 @@ type Scheduler struct { concurrency uint64 } -func NewScheduler(tables schema.Tables, client schema.ClientMeta, opts ...Option) *Scheduler { +func NewScheduler(client schema.ClientMeta, opts ...Option) *Scheduler { s := Scheduler{ - tables: tables, client: client, metrics: &Metrics{TableClient: make(map[string]map[string]*TableClientMetrics)}, caser: caser.New(), concurrency: defaultConcurrency, - maxDepth: maxDepth(tables), + maxDepth: defaultMaxDepth, } for _, opt := range opts { opt(&s) } - if s.maxDepth > 3 { - panic(fmt.Errorf("max depth of %d is not supported for scheduler", s.maxDepth)) - } return &s } -func (s *Scheduler) Sync(ctx context.Context, res chan<- message.Message) error { +// SyncAll is mostly used for testing as it will sync all tables and can run out of memory +// in the real world. Should use Sync for production. +func (s *Scheduler) SyncAll(ctx context.Context, tables schema.Tables) (message.Messages, error) { + res := make(chan message.Message) + go func() { + defer close(res) + s.Sync(ctx, tables, res) + }() + var messages []message.Message + for msg := range res { + messages = append(messages, msg) + } + return messages, nil +} + +func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- message.Message) error { + + if len(tables) == 0 { + return nil + } + + if maxDepth(tables) > s.maxDepth { + return fmt.Errorf("max depth exceeded, max depth is %d", s.maxDepth) + } + s.tables = tables + resources := make(chan *schema.Resource) go func() { defer close(resources) diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index fa5aa9669f..ee6e55b801 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -229,9 +229,9 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist WithSchedulerStrategy(strategy), WithDeterministicCQId(deterministicCQID), } - sc := NewScheduler(tables, &c, opts...) + sc := NewScheduler(&c, opts...) msgs := make(chan message.Message, 10) - if err := sc.Sync(ctx, msgs); err != nil { + if err := sc.Sync(ctx, tables, msgs); err != nil { t.Fatal(err) } close(msgs) diff --git a/schema/validators.go b/schema/validators.go index b42f59e223..6116e861a1 100644 --- a/schema/validators.go +++ b/schema/validators.go @@ -3,6 +3,8 @@ package schema import ( "errors" "fmt" + + "github.com/apache/arrow/go/v13/arrow" ) type TableValidator interface { @@ -53,3 +55,28 @@ func validateTableAttributesNameLength(t *Table) error { func (LengthTableValidator) Validate(t *Table) error { return validateTableAttributesNameLength(t) } + +func FindEmptyColumns(table *Table, records []arrow.Record) []string { + columnsWithValues := make([]bool, len(table.Columns)) + emptyColumns := make([]string, 0) + + for _, resource := range records { + for colIndex, arr := range resource.Columns() { + for i := 0; i < arr.Len(); i++ { + if arr.IsValid(i) { + columnsWithValues[colIndex] = true + } + } + } + } + + // Make sure every column has at least one value. + for i, hasValue := range columnsWithValues { + col := table.Columns[i] + emptyExpected := col.Name == "_cq_parent_id" && table.Parent == nil + if !hasValue && !emptyExpected && !col.IgnoreInTests { + emptyColumns = append(emptyColumns, col.Name) + } + } + return emptyColumns +} diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index f6704cf488..1ae699a654 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -81,11 +81,11 @@ func NewMixedBatchWriter(client MixedBatchClient, opts ...MixedBatchWriterOption func msgID(msg message.Message) int { switch msg.(type) { - case message.MigrateTable, *message.MigrateTable: + case *message.MigrateTable: return msgTypeMigrateTable - case message.Insert, *message.Insert: + case *message.Insert: return msgTypeInsert - case message.DeleteStale, *message.DeleteStale: + case *message.DeleteStale: return msgTypeDeleteStale } panic("unknown message type: " + reflect.TypeOf(msg).Name()) From 54c8eaa125475d8f87c138670dca32a5f47cae75 Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Tue, 20 Jun 2023 20:17:35 +0100 Subject: [PATCH 093/125] discovery versions: int32 instead of uint64 as is such in the proto --- internal/servers/discovery/v1/discovery.go | 2 +- serve/plugin.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/servers/discovery/v1/discovery.go b/internal/servers/discovery/v1/discovery.go index 47c0197ec7..896e8a9cea 100644 --- a/internal/servers/discovery/v1/discovery.go +++ b/internal/servers/discovery/v1/discovery.go @@ -8,7 +8,7 @@ import ( type Server struct { pb.UnimplementedDiscoveryServer - Versions []uint64 + Versions []int32 } func (s *Server) GetVersions(context.Context, *pb.GetVersions_Request) (*pb.GetVersions_Response, error) { diff --git a/serve/plugin.go b/serve/plugin.go index e18a745a37..94466733db 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -177,7 +177,7 @@ func (s *PluginServe) newCmdPluginServe() *cobra.Command { Versions: []string{"v0", "v1", "v2", "v3"}, }) pbdiscoveryv1.RegisterDiscoveryServer(grpcServer, &discoveryServerV1.Server{ - Versions: []uint64{0, 1, 2, 3}, + Versions: []int32{0, 1, 2, 3}, }) version := s.plugin.Version() From cb2955d46d342097119a6688febf3ac6a45b1cbd Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Wed, 21 Jun 2023 17:20:37 +0100 Subject: [PATCH 094/125] Add test for NewRecordFromBytes --- schema/arrow_test.go | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/schema/arrow_test.go b/schema/arrow_test.go index 377cc5718f..bfe898bc19 100644 --- a/schema/arrow_test.go +++ b/schema/arrow_test.go @@ -1,9 +1,13 @@ package schema import ( + "fmt" + "strings" "testing" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" ) func TestSchemaEncode(t *testing.T) { @@ -42,3 +46,58 @@ func TestSchemaEncode(t *testing.T) { } } } + +func TestRecordToBytesAndNewRecordFromBytes(t *testing.T) { + md := arrow.NewMetadata([]string{"key"}, []string{"value"}) + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64}, + {Name: "name", Type: arrow.BinaryTypes.String}, + }, + &md, + ) + bldr := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer bldr.Release() + bldr.Field(0).AppendValueFromString("1") + bldr.Field(1).AppendValueFromString("foo") + record := bldr.NewRecord() + b, err := RecordToBytes(record) + if err != nil { + t.Fatal(err) + } + decodedRecord, err := NewRecordFromBytes(b) + if err != nil { + t.Fatal(err) + } + numRows := record.NumRows() + if numRows != 1 { + t.Fatalf("expected 1 row, got %d", numRows) + } + if diff := RecordDiff(record, decodedRecord); diff != "" { + t.Fatalf("record differs from expected after NewRecordFromBytes: %v", diff) + } +} + +func RecordDiff(l arrow.Record, r arrow.Record) string { + var sb strings.Builder + if l.NumCols() != r.NumCols() { + return fmt.Sprintf("different number of columns: %d vs %d", l.NumCols(), r.NumCols()) + } + if l.NumRows() != r.NumRows() { + return fmt.Sprintf("different number of rows: %d vs %d", l.NumRows(), r.NumRows()) + } + for i := 0; i < int(l.NumCols()); i++ { + edits, err := array.Diff(l.Column(i), r.Column(i)) + if err != nil { + panic(fmt.Sprintf("left: %v, right: %v, error: %v", l.Column(i).DataType(), r.Column(i).DataType(), err)) + } + diff := edits.UnifiedDiff(l.Column(i), r.Column(i)) + if diff != "" { + sb.WriteString(l.Schema().Field(i).Name) + sb.WriteString(": ") + sb.WriteString(diff) + sb.WriteString("\n") + } + } + return sb.String() +} From 49b01e6cf0d1939ad7eb3907cd7d7145a9af23f7 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 22 Jun 2023 14:44:08 +0100 Subject: [PATCH 095/125] Add better migration support --- go.mod | 1 + .../servers/destination/v0/destinations.go | 2 +- .../servers/destination/v1/destinations.go | 2 +- internal/servers/discovery/v1/discovery.go | 6 ++++- internal/servers/plugin/v3/plugin.go | 11 +++++++--- scheduler/scheduler.go | 8 ++++++- schema/arrow.go | 13 +++++++++-- schema/arrow_test.go | 22 +++++++++++++++++++ schema/table.go | 2 +- serve/destination_v0_test.go | 2 +- serve/destination_v1_test.go | 2 +- 11 files changed, 59 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 420f12c1ca..1820a059db 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,7 @@ replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13 replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c >>>>>>> 7e5547e (more wip) +replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go require ( diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 8e13b14531..8b97defa3a 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -10,7 +10,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 83f02462d9..99c5a653c5 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" diff --git a/internal/servers/discovery/v1/discovery.go b/internal/servers/discovery/v1/discovery.go index 896e8a9cea..fedb964338 100644 --- a/internal/servers/discovery/v1/discovery.go +++ b/internal/servers/discovery/v1/discovery.go @@ -12,5 +12,9 @@ type Server struct { } func (s *Server) GetVersions(context.Context, *pb.GetVersions_Request) (*pb.GetVersions_Response, error) { - return &pb.GetVersions_Response{Versions: s.Versions}, nil + v := make([]int32, len(s.Versions)) + for i := range s.Versions { + v[i] = int32(s.Versions[i]) + } + return &pb.GetVersions_Response{Versions: v}, nil } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 788aaec743..e57d984a0e 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -82,7 +82,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { if s.NoSentry { opts = append(opts, managedplugin.WithNoSentry()) } - statePlugin, err := managedplugin.NewClient(ctx, managedplugin.Config{ + statePlugin, err := managedplugin.NewClient(ctx, managedplugin.PluginDestination, managedplugin.Config{ Path: req.StateBackend.Path, Registry: managedplugin.Registry(req.StateBackend.Registry), Version: req.StateBackend.Version, @@ -109,12 +109,17 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { for msg := range msgs { switch m := msg.(type) { case *message.MigrateTable: - m.Table.ToArrowSchema() + tableSchema := m.Table.ToArrowSchema() + schemaBytes, err := schema.ToBytes(tableSchema) + if err != nil { + return status.Errorf(codes.Internal, "failed to encode table schema: %v", err) + } pbMsg.Message = &pb.Sync_Response_MigrateTable{ MigrateTable: &pb.MessageMigrateTable{ - Table: nil, + Table: schemaBytes, }, } + case *message.Insert: recordBytes, err := schema.RecordToBytes(m.Record) if err != nil { diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index d1dc149804..d3f767e997 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -142,7 +142,6 @@ func (s *Scheduler) SyncAll(ctx context.Context, tables schema.Tables) (message. } func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- message.Message) error { - if len(tables) == 0 { return nil } @@ -152,6 +151,13 @@ func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- m } s.tables = tables + // send migrate messages first + for _, table := range tables { + res <- &message.MigrateTable{ + Table: table, + } + } + resources := make(chan *schema.Resource) go func() { defer close(resources) diff --git a/schema/arrow.go b/schema/arrow.go index 4baa2a4b86..d3d5e9639e 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -52,6 +52,15 @@ func (s Schemas) Encode() ([][]byte, error) { return ret, nil } +func ToBytes(schema *arrow.Schema) ([]byte, error) { + var buf bytes.Buffer + wr := ipc.NewWriter(&buf, ipc.WithSchema(schema)) + if err := wr.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + func RecordToBytes(record arrow.Record) ([]byte, error) { var buf bytes.Buffer wr := ipc.NewWriter(&buf, ipc.WithSchema(record.Schema())) @@ -77,7 +86,7 @@ func NewRecordFromBytes(b []byte) (arrow.Record, error) { return nil, nil } -func NewSchemaFromBytes(b []byte) (*arrow.Schema, error) { +func NewFromBytes(b []byte) (*arrow.Schema, error) { rdr, err := ipc.NewReader(bytes.NewReader(b)) if err != nil { return nil, err @@ -89,7 +98,7 @@ func NewSchemasFromBytes(b [][]byte) (Schemas, error) { var err error ret := make([]*arrow.Schema, len(b)) for i, buf := range b { - ret[i], err = NewSchemaFromBytes(buf) + ret[i], err = NewFromBytes(buf) if err != nil { return nil, err } diff --git a/schema/arrow_test.go b/schema/arrow_test.go index bfe898bc19..184161f6dd 100644 --- a/schema/arrow_test.go +++ b/schema/arrow_test.go @@ -78,6 +78,28 @@ func TestRecordToBytesAndNewRecordFromBytes(t *testing.T) { } } +func TestSchemaToBytesAndNewSchemaFromBytes(t *testing.T) { + md := arrow.NewMetadata([]string{"key"}, []string{"value"}) + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64}, + {Name: "name", Type: arrow.BinaryTypes.String}, + }, + &md, + ) + b, err := ToBytes(schema) + if err != nil { + t.Fatal(err) + } + decodedSchema, err := NewFromBytes(b) + if err != nil { + t.Fatal(err) + } + if !schema.Equal(decodedSchema) { + t.Fatalf("schema differs from expected after NewSchemaFromBytes. \nBefore: %v,\nAfter: %v", schema, decodedSchema) + } +} + func RecordDiff(l arrow.Record, r arrow.Record) string { var sb strings.Builder if l.NumCols() != r.NumCols() { diff --git a/schema/table.go b/schema/table.go index 76a7384650..b36c02b6a7 100644 --- a/schema/table.go +++ b/schema/table.go @@ -109,7 +109,7 @@ func NewTablesFromArrowSchemas(schemas []*arrow.Schema) (Tables, error) { } func NewTableFromBytes(b []byte) (*Table, error) { - sc, err := NewSchemaFromBytes(b) + sc, err := NewFromBytes(b) if err != nil { return nil, err } diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 150cbc29fb..96b7565c82 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/memory" pbBase "github.com/cloudquery/plugin-pb-go/pb/base/v0" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v0" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" schemav2 "github.com/cloudquery/plugin-sdk/v2/schema" "github.com/cloudquery/plugin-sdk/v2/testdata" "github.com/cloudquery/plugin-sdk/v4/internal/deprecated" diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index 11e1ab738c..cafb1f7cdf 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -11,7 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" - "github.com/cloudquery/plugin-pb-go/specs/v0" + "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" From c6440a5cea7475aea12b0b6b170bbfb0e3f198f1 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 22 Jun 2023 11:40:44 +0300 Subject: [PATCH 096/125] commit fixes --- go.mod | 6 +----- go.sum | 6 ++++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 1820a059db..a6f50265b9 100644 --- a/go.mod +++ b/go.mod @@ -18,18 +18,13 @@ require ( github.com/stretchr/testify v1.8.4 github.com/thoas/go-funk v0.9.3 golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 - golang.org/x/net v0.9.0 golang.org/x/sync v0.1.0 golang.org/x/text v0.9.0 google.golang.org/grpc v1.55.0 google.golang.org/protobuf v1.30.0 ) -<<<<<<< HEAD -replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 -======= replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c ->>>>>>> 7e5547e (more wip) replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go @@ -59,6 +54,7 @@ require ( github.com/spf13/pflag v1.0.5 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/mod v0.8.0 // indirect + golang.org/x/net v0.9.0 // indirect golang.org/x/sys v0.7.0 // indirect golang.org/x/term v0.7.0 // indirect golang.org/x/tools v0.6.0 // indirect diff --git a/go.sum b/go.sum index 6d7acabc3e..7eb5068cd3 100644 --- a/go.sum +++ b/go.sum @@ -214,6 +214,12 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +<<<<<<< HEAD +======= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +>>>>>>> c0b7ea6 (commit fixes) github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= From a8b629e5a84b2070be0380e95b071d8d0e70b8f7 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Thu, 22 Jun 2023 22:54:07 +0300 Subject: [PATCH 097/125] migrate flattend tables --- scheduler/scheduler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index d3f767e997..be5a7a1e71 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -152,7 +152,7 @@ func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- m s.tables = tables // send migrate messages first - for _, table := range tables { + for _, table := range tables.FlattenTables() { res <- &message.MigrateTable{ Table: table, } From 10ce6dbc153188e930e7c4b05381ab660842b888 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 23 Jun 2023 10:59:13 +0100 Subject: [PATCH 098/125] Remove upserts --- internal/servers/plugin/v3/plugin.go | 2 -- internal/servers/plugin/v3/state.go | 1 - 2 files changed, 3 deletions(-) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index e57d984a0e..bc902f2272 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -128,7 +128,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { pbMsg.Message = &pb.Sync_Response_Insert{ Insert: &pb.MessageInsert{ Record: recordBytes, - Upsert: m.Upsert, }, } case *message.DeleteStale: @@ -217,7 +216,6 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } pluginMessage = &message.Insert{ Record: record, - Upsert: pbMsg.Insert.Upsert, } case *pb.Write_Request_Delete: table, err := schema.NewTableFromBytes(pbMsg.Delete.Table) diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index f7a9015433..146e646248 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -154,7 +154,6 @@ func (c *ClientV3) flush(ctx context.Context) error { Message: &pbPlugin.Write_Request_Insert{ Insert: &pbPlugin.MessageInsert{ Record: buf.Bytes(), - Upsert: true, }, }, }); err != nil { From e6a9949ecfd1e901a1a9b945458f3d31a42b54bd Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:17:20 +0300 Subject: [PATCH 099/125] fix tests --- go.mod | 7 ++----- go.sum | 2 ++ scheduler/scheduler_test.go | 10 +++------- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index a6f50265b9..f6d8df335b 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,9 @@ module github.com/cloudquery/plugin-sdk/v4 go 1.19 require ( - github.com/apache/arrow/go/v13 v13.0.0-20230601214540-018e7d3f9c4b + github.com/apache/arrow/go/v13 v13.0.0-20230622042343-ec413b7763fe github.com/bradleyjkemp/cupaloy/v2 v2.8.0 - github.com/cloudquery/plugin-pb-go v1.2.0 + github.com/cloudquery/plugin-pb-go v1.2.1 github.com/cloudquery/plugin-sdk/v2 v2.7.0 github.com/getsentry/sentry-go v0.20.0 github.com/goccy/go-json v0.10.0 @@ -26,9 +26,6 @@ require ( replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c -replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go -replace github.com/cloudquery/plugin-pb-go => ../plugin-pb-go - require ( github.com/andybalholm/brotli v1.0.5 // indirect github.com/apache/thrift v0.16.0 // indirect diff --git a/go.sum b/go.sum index 7eb5068cd3..f3e63c76e2 100644 --- a/go.sum +++ b/go.sum @@ -49,6 +49,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c h1:nQSB4v0QxCW5XDLvVBcaNrsJ+J/esMBoFYjymllxM1E= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= +github.com/cloudquery/plugin-pb-go v1.2.1 h1:Ewsg70dkB/f+hzeqKNhEslX0u+1zG01eb4kQ8V9d2dk= +github.com/cloudquery/plugin-pb-go v1.2.1/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index ee6e55b801..c8a19fb856 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -238,12 +238,6 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist var i int for msg := range msgs { - if tc.data == nil { - t.Fatalf("Unexpected message %v", msg) - } - if i >= len(tc.data) { - t.Fatalf("expected %d resources. got %d", len(tc.data), i) - } switch v := msg.(type) { case *message.Insert: record := v.Record @@ -252,8 +246,10 @@ func testSyncTable(t *testing.T, tc syncTestCase, strategy Strategy, determinist t.Fatalf("expected at i=%d: %v. got %v", i, tc.data[i], record) } i++ + case *message.MigrateTable: + // ignore default: - t.Fatalf("expected insert message. got %v", msg) + t.Fatalf("expected insert message. got %T", msg) } } if len(tc.data) != i { From 338e57ae0d9df0e7b887a52b254200134754db00 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:55:55 +0300 Subject: [PATCH 100/125] rebase complete --- go.mod | 1 + go.sum | 14 +++++--------- plugin/diff.go | 2 +- plugin/nulls.go | 2 +- plugin/testing_sync.go | 33 --------------------------------- scalar/scalar.go | 2 ++ scalar/string.go | 3 +++ scheduler/scheduler_test.go | 18 +++++++++--------- schema/testdata.go | 1 - 9 files changed, 22 insertions(+), 54 deletions(-) delete mode 100644 plugin/testing_sync.go diff --git a/go.mod b/go.mod index f6d8df335b..4f0776f201 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,7 @@ require ( github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v23.1.21+incompatible // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/klauspost/asmfmt v1.3.2 // indirect github.com/klauspost/compress v1.16.0 // indirect github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/mattn/go-colorable v0.1.13 // indirect diff --git a/go.sum b/go.sum index f3e63c76e2..37d20fb479 100644 --- a/go.sum +++ b/go.sum @@ -89,6 +89,7 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= +github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -106,6 +107,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/flatbuffers v23.1.21+incompatible h1:bUqzx/MXCDxuS0hRJL2EfjyZL3uQrPbMocUa8zGqsTA= @@ -151,6 +154,8 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= @@ -168,8 +173,6 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -<<<<<<< HEAD -======= github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= @@ -178,7 +181,6 @@ github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8D github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= ->>>>>>> 446b805 (wip) github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= @@ -206,22 +208,16 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -<<<<<<< HEAD -======= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= ->>>>>>> 446b805 (wip) github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -<<<<<<< HEAD -======= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= ->>>>>>> c0b7ea6 (commit fixes) github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= diff --git a/plugin/diff.go b/plugin/diff.go index dc3c555ce0..343de8d19b 100644 --- a/plugin/diff.go +++ b/plugin/diff.go @@ -1,4 +1,4 @@ -package destination +package plugin import ( "fmt" diff --git a/plugin/nulls.go b/plugin/nulls.go index 02d80a5f1c..e60a88a24f 100644 --- a/plugin/nulls.go +++ b/plugin/nulls.go @@ -69,4 +69,4 @@ func (f AllowNullFunc) replaceNullsByEmpty(records []arrow.Record) { } records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) } -} \ No newline at end of file +} diff --git a/plugin/testing_sync.go b/plugin/testing_sync.go deleted file mode 100644 index edf045f055..0000000000 --- a/plugin/testing_sync.go +++ /dev/null @@ -1,33 +0,0 @@ -package plugin - -import ( - "fmt" - "strings" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" -) - -func RecordDiff(l arrow.Record, r arrow.Record) string { - var sb strings.Builder - if l.NumCols() != r.NumCols() { - return fmt.Sprintf("different number of columns: %d vs %d", l.NumCols(), r.NumCols()) - } - if l.NumRows() != r.NumRows() { - return fmt.Sprintf("different number of rows: %d vs %d", l.NumRows(), r.NumRows()) - } - for i := 0; i < int(l.NumCols()); i++ { - edits, err := array.Diff(l.Column(i), r.Column(i)) - if err != nil { - panic(fmt.Sprintf("left: %v, right: %v, error: %v", l.Column(i).DataType(), r.Column(i).DataType(), err)) - } - diff := edits.UnifiedDiff(l.Column(i), r.Column(i)) - if diff != "" { - sb.WriteString(l.Schema().Field(i).Name) - sb.WriteString(": ") - sb.WriteString(diff) - sb.WriteString("\n") - } - } - return sb.String() -} diff --git a/scalar/scalar.go b/scalar/scalar.go index d80c1a2e5e..7236cd7109 100644 --- a/scalar/scalar.go +++ b/scalar/scalar.go @@ -5,8 +5,10 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/float16" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/cloudquery/plugin-sdk/v4/types" + "golang.org/x/exp/maps" ) // Scalar represents a single value of a specific DataType as opposed to diff --git a/scalar/string.go b/scalar/string.go index 0d191d844e..7997aded97 100644 --- a/scalar/string.go +++ b/scalar/string.go @@ -4,8 +4,11 @@ import ( "fmt" "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" ) +const nullValueStr = array.NullValueStr + type String struct { Valid bool Value string diff --git a/scheduler/scheduler_test.go b/scheduler/scheduler_test.go index c8a19fb856..1fe5bc57ea 100644 --- a/scheduler/scheduler_test.go +++ b/scheduler/scheduler_test.go @@ -139,7 +139,7 @@ var syncTestCases = []syncTestCase{ table: testTableSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, }, @@ -156,10 +156,10 @@ var syncTestCases = []syncTestCase{ table: testTableRelationSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, }, @@ -167,7 +167,7 @@ var syncTestCases = []syncTestCase{ table: testTableSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, deterministicCQID: true, @@ -176,8 +176,8 @@ var syncTestCases = []syncTestCase{ table: testTableColumnResolverPanic(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, - &scalar.Int64{}, + &scalar.Int{Value: 3, Valid: true}, + &scalar.Int{}, }, }, // deterministicCQID: true, @@ -186,10 +186,10 @@ var syncTestCases = []syncTestCase{ table: testTableRelationSuccess(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, // deterministicCQID: true, @@ -198,7 +198,7 @@ var syncTestCases = []syncTestCase{ table: testTableSuccessWithPK(), data: []scalar.Vector{ { - &scalar.Int64{Value: 3, Valid: true}, + &scalar.Int{Value: 3, Valid: true}, }, }, // deterministicCQID: true, diff --git a/schema/testdata.go b/schema/testdata.go index c592ddc40a..af79a95f5e 100644 --- a/schema/testdata.go +++ b/schema/testdata.go @@ -21,7 +21,6 @@ import ( // TestSourceOptions controls which types are included by TestSourceColumns. type TestSourceOptions struct { SkipDates bool - SkipDecimals bool SkipDurations bool SkipIntervals bool SkipLargeTypes bool // e.g. large binary, large string From b556091f62156a3f17982593f62f614ca6fb34c4 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 14:29:49 +0300 Subject: [PATCH 101/125] remove cover --- .gitignore | 3 +- cover | 199 ----------------------------------------------------- 2 files changed, 2 insertions(+), 200 deletions(-) delete mode 100644 cover diff --git a/.gitignore b/.gitignore index 605ca47ead..2d30804e99 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ vendor cover.out .delta.* bench.json -serve/^TestPluginDocs$/ \ No newline at end of file +serve/^TestPluginDocs$/ +cover diff --git a/cover b/cover deleted file mode 100644 index 5fb4e3be13..0000000000 --- a/cover +++ /dev/null @@ -1,199 +0,0 @@ -mode: set -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:45.37,47.30 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:53.2,53.24 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:47.30,48.12 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:51.3,51.41 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:48.12,50.4 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:56.44,58.2 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:62.47,63.28 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:63.28,65.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:68.59,69.28 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:69.28,71.3 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:74.49,75.28 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:75.28,77.3 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:80.63,81.28 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:81.28,83.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:104.94,113.27 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:116.2,116.11 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:113.27,115.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:119.78,121.12 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:132.2,132.34 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:139.2,139.12 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:121.12,123.21 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:124.21,125.29 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:126.28,127.36 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:128.11,129.57 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:132.34,138.3 5 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:142.86,144.31 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:144.31,148.3 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:151.157,160.15 9 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:171.2,171.38 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:185.2,185.34 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:189.2,189.39 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:201.2,202.17 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:160.15,161.35 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:161.35,165.47 4 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:165.47,168.5 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:171.38,172.74 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:172.74,175.38 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:181.4,181.14 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:175.38,176.48 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:176.48,179.6 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:185.34,187.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:189.39,190.75 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:190.75,193.38 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:193.38,194.48 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:194.48,197.6 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:205.183,208.15 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:221.2,221.23 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:208.15,209.35 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:209.35,213.47 4 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:213.47,217.5 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:221.23,222.62 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:222.62,225.38 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:225.38,226.48 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:226.48,230.6 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:233.8,236.15 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:236.15,238.18 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:238.18,241.39 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:241.39,242.49 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:242.49,246.7 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:253.44,255.22 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:258.2,258.31 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:264.2,264.14 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:255.22,257.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:258.31,260.23 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:260.23,262.4 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:269.30,270.11 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:273.2,273.10 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler.go:270.11,272.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:17.93,24.42 4 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:29.2,34.33 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:58.2,59.33 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:81.2,81.11 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:24.42,28.3 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:34.33,36.29 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:40.3,41.29 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:52.3,55.44 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:36.29,38.4 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:41.29,42.41 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:42.41,44.5 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:44.10,45.48 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:49.5,49.110 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:45.48,48.6 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:59.33,62.34 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:62.34,64.57 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:69.4,70.14 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:64.57,68.5 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:70.14,76.5 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:84.184,89.19 4 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:92.2,95.12 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:121.2,121.21 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:126.2,126.19 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:89.19,91.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:95.12,96.16 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:108.3,108.66 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:96.16,97.36 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:106.4,106.14 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:97.36,99.48 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:103.5,104.46 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:99.48,102.6 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:108.66,111.38 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:117.4,117.10 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:111.38,112.48 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:112.48,115.6 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:121.21,123.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:126.19,129.3 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:132.203,134.30 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:137.2,138.12 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:194.2,195.38 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:213.2,213.11 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:134.30,136.3 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:138.12,142.33 4 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:191.3,191.12 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:142.33,144.56 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:150.4,151.14 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:144.56,149.5 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:151.14,156.32 4 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:160.5,160.79 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:174.5,174.55 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:188.5,188.38 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:156.32,158.6 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:160.79,163.86 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:171.6,172.12 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:163.86,166.50 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:166.50,169.8 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:174.55,177.86 3 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:185.6,186.12 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:177.86,180.50 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:180.50,183.8 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:195.38,198.53 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:198.53,200.61 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:205.4,206.14 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:200.61,204.5 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_dfs.go:206.14,210.5 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:16.100,21.42 4 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:26.2,31.33 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:42.2,45.34 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:66.2,66.11 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:21.42,25.3 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:31.33,33.29 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:36.3,39.44 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:33.29,35.4 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:45.34,48.56 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:53.3,54.13 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:48.56,52.4 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:54.13,62.4 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:71.108,74.6 3 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:87.2,87.21 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:74.6,76.32 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:82.3,83.16 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:76.32,77.41 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:77.41,80.5 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/scheduler_round_robin.go:83.16,84.9 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:23.68,25.2 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:28.46,29.48 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:42.2,42.52 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:55.2,55.13 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:29.48,30.42 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:30.42,31.46 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:34.4,34.54 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:37.4,37.54 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:31.46,33.5 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:34.54,36.5 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:37.54,39.5 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:42.52,43.42 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:43.42,44.42 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:47.4,47.50 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:50.4,50.50 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:44.42,46.5 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:47.50,49.5 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:50.50,52.5 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:58.85,60.33 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:63.2,63.43 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:60.33,62.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:63.43,65.3 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:68.40,70.46 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:75.2,75.14 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:70.46,71.41 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:71.41,73.4 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:78.46,80.46 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:85.2,85.14 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:80.46,81.41 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:81.41,83.4 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:88.40,90.46 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:95.2,95.14 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:90.46,91.41 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:91.41,93.4 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:98.46,100.46 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:105.2,105.14 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:100.46,101.41 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:101.41,103.4 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:108.43,110.46 2 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:115.2,115.14 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:110.46,111.41 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:111.41,113.4 1 1 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:118.49,120.46 2 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:125.2,125.14 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:120.46,121.41 1 0 -github.com/cloudquery/plugin-sdk/v4/scheduler/metrics.go:121.41,123.4 1 0 From 919770a63f9999ade13dca342a13fb6edd239e61 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Fri, 23 Jun 2023 14:06:15 +0100 Subject: [PATCH 102/125] Converting between bytes is the responsibility of plugin-pb-go --- internal/servers/destination/v1/convert.go | 32 ++++++++ .../servers/destination/v1/destinations.go | 6 +- internal/servers/plugin/v3/plugin.go | 36 ++++++--- schema/arrow.go | 79 ------------------- schema/table.go | 8 -- 5 files changed, 60 insertions(+), 101 deletions(-) create mode 100644 internal/servers/destination/v1/convert.go diff --git a/internal/servers/destination/v1/convert.go b/internal/servers/destination/v1/convert.go new file mode 100644 index 0000000000..7fc57f2f01 --- /dev/null +++ b/internal/servers/destination/v1/convert.go @@ -0,0 +1,32 @@ +package destination + +import ( + "bytes" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +// Legacy conversion functions to and from Arrow bytes. From plugin v3 onwards +// this responsibility is handled by plugin-pb-go. + +func NewFromBytes(b []byte) (*arrow.Schema, error) { + rdr, err := ipc.NewReader(bytes.NewReader(b)) + if err != nil { + return nil, err + } + return rdr.Schema(), nil +} + +func NewSchemasFromBytes(b [][]byte) (schema.Schemas, error) { + var err error + ret := make([]*arrow.Schema, len(b)) + for i, buf := range b { + ret[i], err = NewFromBytes(buf) + if err != nil { + return nil, err + } + } + return ret, nil +} diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 99c5a653c5..2180dff859 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -55,7 +55,7 @@ func (s *Server) GetVersion(context.Context, *pb.GetVersion_Request) (*pb.GetVer } func (s *Server) Migrate(ctx context.Context, req *pb.Migrate_Request) (*pb.Migrate_Response, error) { - schemas, err := schema.NewSchemasFromBytes(req.Tables) + schemas, err := NewSchemasFromBytes(req.Tables) if err != nil { return nil, status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) } @@ -97,7 +97,7 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { return status.Errorf(codes.Internal, "failed to receive msg: %v", err) } - schemas, err := schema.NewSchemasFromBytes(r.Tables) + schemas, err := NewSchemasFromBytes(r.Tables) if err != nil { return status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) } @@ -199,7 +199,7 @@ func (s *Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) } func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) (*pb.DeleteStale_Response, error) { - schemas, err := schema.NewSchemasFromBytes(req.Tables) + schemas, err := NewSchemasFromBytes(req.Tables) if err != nil { return nil, status.Errorf(codes.InvalidArgument, "failed to create schemas: %v", err) } diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index bc902f2272..874dc6507d 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -36,9 +36,13 @@ func (s *Server) GetTables(ctx context.Context, _ *pb.GetTables_Request) (*pb.Ge if err != nil { return nil, status.Errorf(codes.Internal, "failed to get tables: %v", err) } - encoded, err := tables.ToArrowSchemas().Encode() - if err != nil { - return nil, fmt.Errorf("failed to encode tables: %w", err) + schemas := tables.ToArrowSchemas() + encoded := make([][]byte, len(schemas)) + for i, schema := range schemas { + encoded[i], err = pb.SchemaToBytes(schema) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to encode tables: %v", err) + } } return &pb.GetTables_Response{ Tables: encoded, @@ -105,12 +109,12 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { } }() - pbMsg := &pb.Sync_Response{} for msg := range msgs { + pbMsg := &pb.Sync_Response{} switch m := msg.(type) { case *message.MigrateTable: tableSchema := m.Table.ToArrowSchema() - schemaBytes, err := schema.ToBytes(tableSchema) + schemaBytes, err := pb.SchemaToBytes(tableSchema) if err != nil { return status.Errorf(codes.Internal, "failed to encode table schema: %v", err) } @@ -121,7 +125,7 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { } case *message.Insert: - recordBytes, err := schema.RecordToBytes(m.Record) + recordBytes, err := pb.RecordToBytes(m.Record) if err != nil { return status.Errorf(codes.Internal, "failed to encode record: %v", err) } @@ -200,16 +204,21 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { var pbMsgConvertErr error switch pbMsg := r.Message.(type) { case *pb.Write_Request_MigrateTable: - table, err := schema.NewTableFromBytes(pbMsg.MigrateTable.Table) + sc, err := pb.NewSchemaFromBytes(pbMsg.MigrateTable.Table) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create schema from bytes: %v", err) + break + } + table, err := schema.NewTableFromArrowSchema(sc) if err != nil { - pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table: %v", err) + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table from schema: %v", err) break } pluginMessage = &message.MigrateTable{ Table: table, } case *pb.Write_Request_Insert: - record, err := schema.NewRecordFromBytes(pbMsg.Insert.Record) + record, err := pb.NewRecordFromBytes(pbMsg.Insert.Record) if err != nil { pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) break @@ -218,9 +227,14 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { Record: record, } case *pb.Write_Request_Delete: - table, err := schema.NewTableFromBytes(pbMsg.Delete.Table) + sc, err := pb.NewSchemaFromBytes(pbMsg.Delete.Table) if err != nil { - pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create record: %v", err) + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create schema from bytes: %v", err) + break + } + table, err := schema.NewTableFromArrowSchema(sc) + if err != nil { + pbMsgConvertErr = status.Errorf(codes.InvalidArgument, "failed to create table from schema: %v", err) break } pluginMessage = &message.DeleteStale{ diff --git a/schema/arrow.go b/schema/arrow.go index d3d5e9639e..56e51de354 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -1,11 +1,7 @@ package schema import ( - "bytes" - "fmt" - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/ipc" ) const ( @@ -38,78 +34,3 @@ func (s Schemas) SchemaByName(name string) *arrow.Schema { } return nil } - -func (s Schemas) Encode() ([][]byte, error) { - ret := make([][]byte, len(s)) - for i, sc := range s { - var buf bytes.Buffer - wr := ipc.NewWriter(&buf, ipc.WithSchema(sc)) - if err := wr.Close(); err != nil { - return nil, err - } - ret[i] = buf.Bytes() - } - return ret, nil -} - -func ToBytes(schema *arrow.Schema) ([]byte, error) { - var buf bytes.Buffer - wr := ipc.NewWriter(&buf, ipc.WithSchema(schema)) - if err := wr.Close(); err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -func RecordToBytes(record arrow.Record) ([]byte, error) { - var buf bytes.Buffer - wr := ipc.NewWriter(&buf, ipc.WithSchema(record.Schema())) - if err := wr.Write(record); err != nil { - return nil, err - } - if err := wr.Close(); err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -func NewRecordFromBytes(b []byte) (arrow.Record, error) { - rdr, err := ipc.NewReader(bytes.NewReader(b)) - if err != nil { - return nil, err - } - for rdr.Next() { - rec := rdr.Record() - rec.Retain() - return rec, nil - } - return nil, nil -} - -func NewFromBytes(b []byte) (*arrow.Schema, error) { - rdr, err := ipc.NewReader(bytes.NewReader(b)) - if err != nil { - return nil, err - } - return rdr.Schema(), nil -} - -func NewSchemasFromBytes(b [][]byte) (Schemas, error) { - var err error - ret := make([]*arrow.Schema, len(b)) - for i, buf := range b { - ret[i], err = NewFromBytes(buf) - if err != nil { - return nil, err - } - } - return ret, nil -} - -func NewTablesFromBytes(b [][]byte) (Tables, error) { - schemas, err := NewSchemasFromBytes(b) - if err != nil { - return nil, fmt.Errorf("failed to decode schemas: %w", err) - } - return NewTablesFromArrowSchemas(schemas) -} diff --git a/schema/table.go b/schema/table.go index b36c02b6a7..a342ff1ee0 100644 --- a/schema/table.go +++ b/schema/table.go @@ -108,14 +108,6 @@ func NewTablesFromArrowSchemas(schemas []*arrow.Schema) (Tables, error) { return tables, nil } -func NewTableFromBytes(b []byte) (*Table, error) { - sc, err := NewFromBytes(b) - if err != nil { - return nil, err - } - return NewTableFromArrowSchema(sc) -} - // Create a CloudQuery Table abstraction from an arrow schema // arrow schema is a low level representation of a table that can be sent // over the wire in a cross-language way From 9c050a1b8ee72fb1a29c1adf7dab9fe462338c0c Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 18:15:01 +0300 Subject: [PATCH 103/125] fix tests --- go.mod | 2 +- go.sum | 4 +- schema/arrow_test.go | 92 ------------------------------------ serve/destination_v1_test.go | 4 +- serve/plugin_test.go | 10 ++-- 5 files changed, 13 insertions(+), 99 deletions(-) diff --git a/go.mod b/go.mod index 4f0776f201..c262d9a980 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.19 require ( github.com/apache/arrow/go/v13 v13.0.0-20230622042343-ec413b7763fe github.com/bradleyjkemp/cupaloy/v2 v2.8.0 - github.com/cloudquery/plugin-pb-go v1.2.1 + github.com/cloudquery/plugin-pb-go v1.3.2 github.com/cloudquery/plugin-sdk/v2 v2.7.0 github.com/getsentry/sentry-go v0.20.0 github.com/goccy/go-json v0.10.0 diff --git a/go.sum b/go.sum index 37d20fb479..ed14b152b7 100644 --- a/go.sum +++ b/go.sum @@ -49,8 +49,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c h1:nQSB4v0QxCW5XDLvVBcaNrsJ+J/esMBoFYjymllxM1E= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= -github.com/cloudquery/plugin-pb-go v1.2.1 h1:Ewsg70dkB/f+hzeqKNhEslX0u+1zG01eb4kQ8V9d2dk= -github.com/cloudquery/plugin-pb-go v1.2.1/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= +github.com/cloudquery/plugin-pb-go v1.3.2 h1:q/REJeRr5zyyNUZMcvE43+X7hV+zjzWqLFlWWOnWbvs= +github.com/cloudquery/plugin-pb-go v1.3.2/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= diff --git a/schema/arrow_test.go b/schema/arrow_test.go index 184161f6dd..8bcf6db8ae 100644 --- a/schema/arrow_test.go +++ b/schema/arrow_test.go @@ -3,103 +3,11 @@ package schema import ( "fmt" "strings" - "testing" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" ) -func TestSchemaEncode(t *testing.T) { - md := arrow.NewMetadata([]string{"true"}, []string{"false"}) - md1 := arrow.NewMetadata([]string{"false"}, []string{"true"}) - schemas := Schemas{ - arrow.NewSchema( - []arrow.Field{ - {Name: "id", Type: arrow.PrimitiveTypes.Int64}, - {Name: "name", Type: arrow.BinaryTypes.String}, - }, - &md, - ), - arrow.NewSchema( - []arrow.Field{ - {Name: "id", Type: arrow.PrimitiveTypes.Int64}, - {Name: "name", Type: arrow.BinaryTypes.String}, - }, - &md1, - ), - } - b, err := schemas.Encode() - if err != nil { - t.Fatal(err) - } - decodedSchemas, err := NewSchemasFromBytes(b) - if err != nil { - t.Fatal(err) - } - if len(decodedSchemas) != len(schemas) { - t.Fatalf("expected %d schemas, got %d", len(schemas), len(decodedSchemas)) - } - for i := range schemas { - if !schemas[i].Equal(decodedSchemas[i]) { - t.Fatalf("expected schema %d to be %v, got %v", i, schemas[i], decodedSchemas[i]) - } - } -} - -func TestRecordToBytesAndNewRecordFromBytes(t *testing.T) { - md := arrow.NewMetadata([]string{"key"}, []string{"value"}) - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "id", Type: arrow.PrimitiveTypes.Int64}, - {Name: "name", Type: arrow.BinaryTypes.String}, - }, - &md, - ) - bldr := array.NewRecordBuilder(memory.DefaultAllocator, schema) - defer bldr.Release() - bldr.Field(0).AppendValueFromString("1") - bldr.Field(1).AppendValueFromString("foo") - record := bldr.NewRecord() - b, err := RecordToBytes(record) - if err != nil { - t.Fatal(err) - } - decodedRecord, err := NewRecordFromBytes(b) - if err != nil { - t.Fatal(err) - } - numRows := record.NumRows() - if numRows != 1 { - t.Fatalf("expected 1 row, got %d", numRows) - } - if diff := RecordDiff(record, decodedRecord); diff != "" { - t.Fatalf("record differs from expected after NewRecordFromBytes: %v", diff) - } -} - -func TestSchemaToBytesAndNewSchemaFromBytes(t *testing.T) { - md := arrow.NewMetadata([]string{"key"}, []string{"value"}) - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "id", Type: arrow.PrimitiveTypes.Int64}, - {Name: "name", Type: arrow.BinaryTypes.String}, - }, - &md, - ) - b, err := ToBytes(schema) - if err != nil { - t.Fatal(err) - } - decodedSchema, err := NewFromBytes(b) - if err != nil { - t.Fatal(err) - } - if !schema.Equal(decodedSchema) { - t.Fatalf("schema differs from expected after NewSchemaFromBytes. \nBefore: %v,\nAfter: %v", schema, decodedSchema) - } -} - func RecordDiff(l arrow.Record, r arrow.Record) string { var sb strings.Builder if l.NumCols() != r.NumCols() { diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index cafb1f7cdf..e4caa91734 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -11,6 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/ipc" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" + pbSource "github.com/cloudquery/plugin-pb-go/pb/source/v2" "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/message" @@ -79,7 +80,8 @@ func TestDestinationV1(t *testing.T) { sourceSpec := specs.Source{ Name: sourceName, } - encodedTables, err := tables.ToArrowSchemas().Encode() + schemas := tables.ToArrowSchemas() + encodedTables, err := pbSource.SchemasToBytes(schemas) if err != nil { t.Fatal(err) } diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 39e48c808b..565f10d0b3 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -71,8 +71,11 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatal(err) } - - tables, err := schema.NewTablesFromBytes(getTablesRes.Tables) + schemas, err := pb.NewSchemasFromBytes(getTablesRes.Tables) + if err != nil { + t.Fatal(err) + } + tables, err := schema.NewTablesFromArrowSchemas(schemas) if err != nil { t.Fatal(err) } @@ -92,7 +95,8 @@ func TestPluginServe(t *testing.T) { bldr := array.NewRecordBuilder(memory.DefaultAllocator, testTable.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("test") record := bldr.NewRecord() - recordBytes, err := schema.RecordToBytes(record) + + recordBytes, err := pb.RecordToBytes(record) if err != nil { t.Fatal(err) } From 203e13f79ef867671ed06df66127970d28311ba4 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 18:15:19 +0300 Subject: [PATCH 104/125] fmt --- serve/plugin_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serve/plugin_test.go b/serve/plugin_test.go index 565f10d0b3..ad91d862f6 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -95,7 +95,7 @@ func TestPluginServe(t *testing.T) { bldr := array.NewRecordBuilder(memory.DefaultAllocator, testTable.ToArrowSchema()) bldr.Field(0).(*array.StringBuilder).Append("test") record := bldr.NewRecord() - + recordBytes, err := pb.RecordToBytes(record) if err != nil { t.Fatal(err) From a55b2abbf41a2e1a585cd4546fcf5299652d4940 Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Fri, 23 Jun 2023 16:59:01 +0100 Subject: [PATCH 105/125] Remove upsert --- internal/memdb/memdb.go | 22 ++++---- .../servers/destination/v0/destinations.go | 1 - .../servers/destination/v1/destinations.go | 1 - internal/servers/discovery/v1/discovery.go | 6 +-- message/message.go | 1 - plugin/testing_upsert.go | 2 - plugin/testing_write_insert.go | 1 - writers/batch.go | 53 +++++++------------ writers/batch_test.go | 35 ++++++------ writers/mixed_batch_test.go | 1 - 10 files changed, 46 insertions(+), 77 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 42d2f89c16..571387bb49 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -43,6 +43,7 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { c := &client{ memoryDB: make(map[string][]arrow.Record), memoryDBLock: sync.RWMutex{}, + tables: make(map[string]*schema.Table), } for _, opt := range options { opt(c) @@ -52,11 +53,8 @@ func GetNewClient(options ...Option) plugin.NewClientFunc { } } -func NewMemDBClient(_ context.Context, _ zerolog.Logger, spec []byte) (plugin.Client, error) { - return &client{ - memoryDB: make(map[string][]arrow.Record), - tables: make(map[string]*schema.Table), - }, nil +func NewMemDBClient(ctx context.Context, l zerolog.Logger, spec []byte) (plugin.Client, error) { + return GetNewClient()(ctx, l, spec) } func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, any) (plugin.Client, error) { @@ -64,8 +62,13 @@ func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, any) (plugin.Client } func (c *client) overwrite(table *schema.Table, data arrow.Record) { - pksIndex := table.PrimaryKeysIndexes() tableName := table.Name + pksIndex := table.PrimaryKeysIndexes() + if len(pksIndex) == 0 { + c.memoryDB[tableName] = append(c.memoryDB[tableName], data) + return + } + for i, row := range c.memoryDB[tableName] { found := true for _, pkIndex := range pksIndex { @@ -113,7 +116,6 @@ func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan< for _, row := range c.memoryDB[tableName] { res <- &message.Insert{ Record: row, - Upsert: false, } } } @@ -174,11 +176,7 @@ func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <- return fmt.Errorf("table name not found in schema metadata") } table := c.tables[tableName] - if msg.Upsert { - c.overwrite(table, msg.Record) - } else { - c.memoryDB[tableName] = append(c.memoryDB[tableName], msg.Record) - } + c.overwrite(table, msg.Record) } c.memoryDBLock.Unlock() diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index 8b97defa3a..dfc7f571d1 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -182,7 +182,6 @@ func (s *Server) Write2(msg pb.Destination_Write2Server) error { convertedResource := CQTypesToRecord(memory.DefaultAllocator, []schemav2.CQTypes{origResource.Data}, table.ToArrowSchema()) msg := &message.Insert{ Record: convertedResource, - Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } select { diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 2180dff859..1b0e4d04c0 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -160,7 +160,6 @@ func (s *Server) Write(msg pb.Destination_WriteServer) error { rec.Retain() msg := &message.Insert{ Record: rec, - Upsert: s.spec.WriteMode == specs.WriteModeOverwrite || s.spec.WriteMode == specs.WriteModeOverwriteDeleteStale, } select { case msgs <- msg: diff --git a/internal/servers/discovery/v1/discovery.go b/internal/servers/discovery/v1/discovery.go index fedb964338..896e8a9cea 100644 --- a/internal/servers/discovery/v1/discovery.go +++ b/internal/servers/discovery/v1/discovery.go @@ -12,9 +12,5 @@ type Server struct { } func (s *Server) GetVersions(context.Context, *pb.GetVersions_Request) (*pb.GetVersions_Response, error) { - v := make([]int32, len(s.Versions)) - for i := range s.Versions { - v[i] = int32(s.Versions[i]) - } - return &pb.GetVersions_Response{Versions: v}, nil + return &pb.GetVersions_Response{Versions: s.Versions}, nil } diff --git a/message/message.go b/message/message.go index e70ced4dbd..dd41a2e171 100644 --- a/message/message.go +++ b/message/message.go @@ -21,7 +21,6 @@ func (m MigrateTable) GetTable() *schema.Table { type Insert struct { Record arrow.Record - Upsert bool } func (m *Insert) GetTable() *schema.Table { diff --git a/plugin/testing_upsert.go b/plugin/testing_upsert.go index a32c7d6497..9dfaa83d4b 100644 --- a/plugin/testing_upsert.go +++ b/plugin/testing_upsert.go @@ -32,7 +32,6 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, - Upsert: true, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } @@ -48,7 +47,6 @@ func (s *WriterTestSuite) testUpsert(ctx context.Context) error { if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, - Upsert: true, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } diff --git a/plugin/testing_write_insert.go b/plugin/testing_write_insert.go index 7dc987a94b..892f7b659a 100644 --- a/plugin/testing_write_insert.go +++ b/plugin/testing_write_insert.go @@ -40,7 +40,6 @@ func (s *WriterTestSuite) testInsert(ctx context.Context) error { if err := s.plugin.writeOne(ctx, WriteOptions{}, &message.Insert{ Record: record, - Upsert: false, }); err != nil { return fmt.Errorf("failed to insert record: %w", err) } diff --git a/writers/batch.go b/writers/batch.go index 67643bc53f..75ed13ed37 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -27,19 +27,19 @@ const ( type BatchWriterClient interface { MigrateTables(context.Context, []*message.MigrateTable) error - WriteTableBatch(ctx context.Context, name string, upsert bool, msgs []*message.Insert) error + WriteTableBatch(ctx context.Context, name string, msgs []*message.Insert) error DeleteStale(context.Context, []*message.DeleteStale) error } type BatchWriter struct { client BatchWriterClient workers map[string]*worker - workersLock *sync.RWMutex - workersWaitGroup *sync.WaitGroup + workersLock sync.RWMutex + workersWaitGroup sync.WaitGroup - migrateTableLock *sync.Mutex + migrateTableLock sync.Mutex migrateTableMessages []*message.MigrateTable - deleteStaleLock *sync.Mutex + deleteStaleLock sync.Mutex deleteStaleMessages []*message.DeleteStale logger zerolog.Logger @@ -82,16 +82,12 @@ type worker struct { func NewBatchWriter(client BatchWriterClient, opts ...Option) (*BatchWriter, error) { c := &BatchWriter{ - client: client, - workers: make(map[string]*worker), - workersLock: &sync.RWMutex{}, - workersWaitGroup: &sync.WaitGroup{}, - migrateTableLock: &sync.Mutex{}, - deleteStaleLock: &sync.Mutex{}, - logger: zerolog.Nop(), - batchTimeout: defaultBatchTimeoutSeconds * time.Second, - batchSize: defaultBatchSize, - batchSizeBytes: defaultBatchSizeBytes, + client: client, + workers: make(map[string]*worker), + logger: zerolog.Nop(), + batchTimeout: defaultBatchTimeoutSeconds * time.Second, + batchSize: defaultBatchSize, + batchSizeBytes: defaultBatchSizeBytes, } for _, opt := range opts { opt(c) @@ -128,41 +124,32 @@ func (w *BatchWriter) Close(ctx context.Context) error { func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *message.Insert, flush <-chan chan bool) { sizeBytes := int64(0) resources := make([]*message.Insert, 0) - upsertBatch := false for { select { case r, ok := <-ch: if !ok { if len(resources) > 0 { - w.flush(ctx, tableName, upsertBatch, resources) + w.flush(ctx, tableName, resources) } return } - if upsertBatch != r.Upsert { - w.flush(ctx, tableName, upsertBatch, resources) - resources = make([]*message.Insert, 0) - sizeBytes = 0 - upsertBatch = r.Upsert - resources = append(resources, r) - sizeBytes = util.TotalRecordSize(r.Record) - } else { - resources = append(resources, r) - sizeBytes += util.TotalRecordSize(r.Record) - } + resources = append(resources, r) + sizeBytes += util.TotalRecordSize(r.Record) + if len(resources) >= w.batchSize || sizeBytes+util.TotalRecordSize(r.Record) >= int64(w.batchSizeBytes) { - w.flush(ctx, tableName, upsertBatch, resources) + w.flush(ctx, tableName, resources) resources = make([]*message.Insert, 0) sizeBytes = 0 } case <-time.After(w.batchTimeout): if len(resources) > 0 { - w.flush(ctx, tableName, upsertBatch, resources) + w.flush(ctx, tableName, resources) resources = make([]*message.Insert, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - w.flush(ctx, tableName, upsertBatch, resources) + w.flush(ctx, tableName, resources) resources = make([]*message.Insert, 0) sizeBytes = 0 } @@ -174,11 +161,11 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *m } } -func (w *BatchWriter) flush(ctx context.Context, tableName string, upsertBatch bool, resources []*message.Insert) { +func (w *BatchWriter) flush(ctx context.Context, tableName string, resources []*message.Insert) { // resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) - if err := w.client.WriteTableBatch(ctx, tableName, upsertBatch, resources); err != nil { + if err := w.client.WriteTableBatch(ctx, tableName, resources); err != nil { w.logger.Err(err).Str("table", tableName).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("failed to write batch") } else { w.logger.Info().Str("table", tableName).Int("len", batchSize).Dur("duration", time.Since(start)).Msg("batch written successfully") diff --git a/writers/batch_test.go b/writers/batch_test.go index e523f1decb..0dd43a974f 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -14,7 +14,7 @@ import ( ) type testBatchClient struct { - mutex *sync.Mutex + mutex sync.Mutex migrateTables []*message.MigrateTable inserts []*message.Insert deleteStales []*message.DeleteStale @@ -45,7 +45,7 @@ func (c *testBatchClient) MigrateTables(_ context.Context, msgs []*message.Migra return nil } -func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, _ bool, msgs []*message.Insert) error { +func (c *testBatchClient) WriteTableBatch(_ context.Context, _ string, msgs []*message.Insert) error { c.mutex.Lock() defer c.mutex.Unlock() c.inserts = append(c.inserts, msgs...) @@ -84,9 +84,7 @@ var batchTestTables = schema.Tables{ func TestBatchFlushDifferentMessages(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{ - mutex: &sync.Mutex{}, - } + testClient := &testBatchClient{} wr, err := NewBatchWriter(testClient) if err != nil { t.Fatal(err) @@ -128,9 +126,7 @@ func TestBatchFlushDifferentMessages(t *testing.T) { func TestBatchSize(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{ - mutex: &sync.Mutex{}, - } + testClient := &testBatchClient{} wr, err := NewBatchWriter(testClient, WithBatchSize(2)) if err != nil { t.Fatal(err) @@ -163,9 +159,7 @@ func TestBatchSize(t *testing.T) { func TestBatchTimeout(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{ - mutex: &sync.Mutex{}, - } + testClient := &testBatchClient{} wr, err := NewBatchWriter(testClient, WithBatchTimeout(time.Second)) if err != nil { t.Fatal(err) @@ -200,18 +194,19 @@ func TestBatchTimeout(t *testing.T) { func TestBatchUpserts(t *testing.T) { ctx := context.Background() - testClient := &testBatchClient{ - mutex: &sync.Mutex{}, - } - wr, err := NewBatchWriter(testClient) + testClient := &testBatchClient{} + wr, err := NewBatchWriter(testClient, WithBatchSize(2), WithBatchTimeout(time.Second)) if err != nil { t.Fatal(err) } - table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64}}} - record := array.NewRecord(table.ToArrowSchema(), nil, 0) + table := schema.Table{Name: "table1", Columns: []schema.Column{{Name: "id", Type: arrow.PrimitiveTypes.Int64, PrimaryKey: true}}} + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema()) + bldr.Field(0).(*array.Int64Builder).Append(1) + record := bldr.NewRecord() + if err := wr.writeAll(ctx, []message.Message{&message.Insert{ Record: record, - Upsert: true, }}); err != nil { t.Fatal(err) } @@ -228,7 +223,7 @@ func TestBatchUpserts(t *testing.T) { // we need to wait for the batch to be flushed time.Sleep(time.Second * 2) - if testClient.InsertsLen() != 1 { - t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) + if testClient.InsertsLen() != 2 { + t.Fatalf("expected 2 insert messages, got %d", testClient.InsertsLen()) } } diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 1ad37cc02f..6f5a23245c 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -91,7 +91,6 @@ func TestMixedBatchWriter(t *testing.T) { rec2 := bldr2.NewRecord() msgInsertTable2 := &message.Insert{ Record: rec2, - Upsert: false, } // message to delete stale from table1 From cb3a6ee2c1fea3d51bac66e9ee2408653857f4aa Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 18:59:30 +0300 Subject: [PATCH 106/125] fix some lints --- .../servers/destination/v0/destinations.go | 11 ++------ plugin/plugin_test.go | 3 --- plugin/validate.go | 27 ------------------- serve/plugin.go | 6 ++++- 4 files changed, 7 insertions(+), 40 deletions(-) delete mode 100644 plugin/validate.go diff --git a/internal/servers/destination/v0/destinations.go b/internal/servers/destination/v0/destinations.go index dfc7f571d1..6532d89440 100644 --- a/internal/servers/destination/v0/destinations.go +++ b/internal/servers/destination/v0/destinations.go @@ -220,15 +220,8 @@ func SetDestinationManagedCqColumns(tables []*schema.Table) { } } -func (s *Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { - // stats := s.Plugin.Metrics() - // b, err := json.Marshal(stats) - // if err != nil { - // return nil, fmt.Errorf("failed to marshal stats: %w", err) - // } - return &pb.GetDestinationMetrics_Response{ - // Metrics: b, - }, nil +func (*Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetMetrics is deprecated. Please update CLI") } func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) (*pb.DeleteStale_Response, error) { diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index d6fdfcba79..3290e28795 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -10,9 +10,6 @@ import ( "github.com/rs/zerolog" ) -type testPluginSpec struct { -} - type testPluginClient struct { messages []message.Message } diff --git a/plugin/validate.go b/plugin/validate.go deleted file mode 100644 index 6f557e9c1f..0000000000 --- a/plugin/validate.go +++ /dev/null @@ -1,27 +0,0 @@ -package plugin - -import ( - "fmt" - - "github.com/cloudquery/plugin-sdk/v4/schema" -) - -func (p *Plugin) validate(tables schema.Tables) error { - if err := tables.ValidateDuplicateColumns(); err != nil { - return fmt.Errorf("found duplicate columns in source plugin: %s: %w", p.name, err) - } - - if err := tables.ValidateDuplicateTables(); err != nil { - return fmt.Errorf("found duplicate tables in source plugin: %s: %w", p.name, err) - } - - if err := tables.ValidateTableNames(); err != nil { - return fmt.Errorf("found table with invalid name in source plugin: %s: %w", p.name, err) - } - - if err := tables.ValidateColumnNames(); err != nil { - return fmt.Errorf("found column with invalid name in source plugin: %s: %w", p.name, err) - } - - return nil -} diff --git a/serve/plugin.go b/serve/plugin.go index 94466733db..c787399820 100644 --- a/serve/plugin.go +++ b/serve/plugin.go @@ -95,7 +95,11 @@ func (s *PluginServe) Serve(ctx context.Context) error { if err := types.RegisterAllExtensions(); err != nil { return err } - defer types.UnregisterAllExtensions() + defer func() { + if err := types.UnregisterAllExtensions(); err != nil { + log.Error().Err(err).Msg("failed to unregister all extensions") + } + }() cmd := s.newCmdPluginRoot() if s.args != nil { cmd.SetArgs(s.args) From 97110aa52a06665eb6c7bc2162a8b67b9b16f7bd Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 19:06:48 +0300 Subject: [PATCH 107/125] lint --- plugin/plugin.go | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/plugin/plugin.go b/plugin/plugin.go index 81fe39dcfd..a5a53e9066 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -62,24 +62,6 @@ type Plugin struct { internalColumns bool } -const ( - maxAllowedDepth = 4 -) - -func maxDepth(tables schema.Tables) uint64 { - var depth uint64 - if len(tables) == 0 { - return 0 - } - for _, table := range tables { - newDepth := 1 + maxDepth(table.Relations) - if newDepth > depth { - depth = newDepth - } - } - return depth -} - // NewPlugin returns a new CloudQuery Plugin with the given name, version and implementation. // Depending on the options, it can be a write-only plugin, read-only plugin, or both. func NewPlugin(name string, version string, newClient NewClientFunc, options ...Option) *Plugin { From 1df923dc76bb149c3bb21c4cd487ee99ad097765 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 19:56:25 +0300 Subject: [PATCH 108/125] tests passing --- serve/destination_v0_test.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/serve/destination_v0_test.go b/serve/destination_v0_test.go index 96b7565c82..7f3c9fe21a 100644 --- a/serve/destination_v0_test.go +++ b/serve/destination_v0_test.go @@ -158,11 +158,6 @@ func TestDestination(t *testing.T) { t.Fatal(err) } - _, err = c.GetMetrics(ctx, &pb.GetDestinationMetrics_Request{}) - if err != nil { - t.Fatal(err) - } - if _, err := c.Close(ctx, &pb.Close_Request{}); err != nil { t.Fatalf("failed to call Close: %v", err) } From b0cfbbf9b875f4dabba8a821d5887dac8ac529f2 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 19:58:04 +0300 Subject: [PATCH 109/125] lints --- message/message.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/message/message.go b/message/message.go index dd41a2e171..8377cc7777 100644 --- a/message/message.go +++ b/message/message.go @@ -54,8 +54,7 @@ type Inserts []*Insert func (messages Messages) InsertItems() int64 { items := int64(0) for _, msg := range messages { - switch m := msg.(type) { - case *Insert: + if m, ok := msg.(*Insert); ok { items += m.Record.NumRows() } } @@ -65,8 +64,7 @@ func (messages Messages) InsertItems() int64 { func (messages Messages) InsertMessage() Inserts { inserts := []*Insert{} for _, msg := range messages { - switch m := msg.(type) { - case *Insert: + if m, ok := msg.(*Insert); ok { inserts = append(inserts, m) } } From 45b18fd8fdda02fd7d55f81060324c97f29a7139 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 20:43:09 +0300 Subject: [PATCH 110/125] more lint stuff --- internal/memdb/memdb.go | 8 ++++---- internal/servers/destination/v1/destinations.go | 11 ++--------- internal/servers/plugin/v3/plugin.go | 4 ++-- plugin/plugin_destination.go | 5 +---- plugin/plugin_test.go | 8 ++++---- scheduler/scheduler.go | 6 ++++-- writers/batch.go | 7 ++++--- writers/batch_test.go | 1 - writers/mixed_batch.go | 2 -- 9 files changed, 21 insertions(+), 31 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 571387bb49..514dd9a7e5 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -87,11 +87,11 @@ func (c *client) overwrite(table *schema.Table, data arrow.Record) { c.memoryDB[tableName] = append(c.memoryDB[tableName], data) } -func (c *client) ID() string { +func (*client) ID() string { return "testDestinationMemDB" } -func (c *client) GetSpec() any { +func (*client) GetSpec() any { return &Spec{} } @@ -123,7 +123,7 @@ func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan< return nil } -func (c *client) Tables(ctx context.Context) (schema.Tables, error) { +func (c *client) Tables(_ context.Context) (schema.Tables, error) { tables := make(schema.Tables, 0, len(c.tables)) for _, table := range c.tables { tables = append(tables, table) @@ -149,7 +149,7 @@ func (c *client) migrate(_ context.Context, table *schema.Table) { c.tables[tableName] = table } -func (c *client) Write(ctx context.Context, options plugin.WriteOptions, msgs <-chan message.Message) error { +func (c *client) Write(ctx context.Context, _ plugin.WriteOptions, msgs <-chan message.Message) error { if c.errOnWrite { return fmt.Errorf("errOnWrite") } diff --git a/internal/servers/destination/v1/destinations.go b/internal/servers/destination/v1/destinations.go index 1b0e4d04c0..fc9b688800 100644 --- a/internal/servers/destination/v1/destinations.go +++ b/internal/servers/destination/v1/destinations.go @@ -186,15 +186,8 @@ func setCQIDAsPrimaryKeysForTables(tables schema.Tables) { } } -func (s *Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { - // stats := s.Plugin.Metrics() - // b, err := json.Marshal(stats) - // if err != nil { - // return nil, fmt.Errorf("failed to marshal stats: %w", err) - // } - return &pb.GetDestinationMetrics_Response{ - // Metrics: b, - }, nil +func (*Server) GetMetrics(context.Context, *pb.GetDestinationMetrics_Request) (*pb.GetDestinationMetrics_Response, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetMetrics is deprecated. please upgrade CLI") } func (s *Server) DeleteStale(ctx context.Context, req *pb.DeleteStale_Request) (*pb.DeleteStale_Response, error) { diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 874dc6507d..aee9f90b50 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -38,8 +38,8 @@ func (s *Server) GetTables(ctx context.Context, _ *pb.GetTables_Request) (*pb.Ge } schemas := tables.ToArrowSchemas() encoded := make([][]byte, len(schemas)) - for i, schema := range schemas { - encoded[i], err = pb.SchemaToBytes(schema) + for i, sc := range schemas { + encoded[i], err = pb.SchemaToBytes(sc) if err != nil { return nil, status.Errorf(codes.Internal, "failed to encode tables: %v", err) } diff --git a/plugin/plugin_destination.go b/plugin/plugin_destination.go index 68890bbd40..900e040b02 100644 --- a/plugin/plugin_destination.go +++ b/plugin/plugin_destination.go @@ -35,8 +35,5 @@ func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources [ } func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { - if err := p.client.Write(ctx, options, res); err != nil { - return err - } - return nil + return p.client.Write(ctx, options, res) } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index 3290e28795..dca7387f9a 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -18,15 +18,15 @@ func newTestPluginClient(context.Context, zerolog.Logger, []byte) (Client, error return &testPluginClient{}, nil } -func (c *testPluginClient) GetSpec() any { +func (*testPluginClient) GetSpec() any { return &struct{}{} } -func (c *testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { +func (*testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { return schema.Tables{}, nil } -func (c *testPluginClient) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { +func (*testPluginClient) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { return nil } @@ -42,7 +42,7 @@ func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res } return nil } -func (c *testPluginClient) Close(context.Context) error { +func (*testPluginClient) Close(context.Context) error { return nil } diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index be5a7a1e71..89ea17ed14 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -130,15 +130,17 @@ func NewScheduler(client schema.ClientMeta, opts ...Option) *Scheduler { // in the real world. Should use Sync for production. func (s *Scheduler) SyncAll(ctx context.Context, tables schema.Tables) (message.Messages, error) { res := make(chan message.Message) + var err error go func() { defer close(res) - s.Sync(ctx, tables, res) + err = s.Sync(ctx, tables, res) }() + // nolint:prealloc var messages []message.Message for msg := range res { messages = append(messages, msg) } - return messages, nil + return messages, err } func (s *Scheduler) Sync(ctx context.Context, tables schema.Tables, res chan<- message.Message) error { diff --git a/writers/batch.go b/writers/batch.go index 75ed13ed37..a060f91d75 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -105,9 +105,10 @@ func (w *BatchWriter) Flush(ctx context.Context) error { <-done } w.workersLock.RUnlock() - w.flushMigrateTables(ctx) - w.flushDeleteStaleTables(ctx) - return nil + if err := w.flushMigrateTables(ctx); err != nil { + return err + } + return w.flushDeleteStaleTables(ctx) } func (w *BatchWriter) Close(ctx context.Context) error { diff --git a/writers/batch_test.go b/writers/batch_test.go index 0dd43a974f..a6940181d1 100644 --- a/writers/batch_test.go +++ b/writers/batch_test.go @@ -120,7 +120,6 @@ func TestBatchFlushDifferentMessages(t *testing.T) { if testClient.InsertsLen() != 1 { t.Fatalf("expected 1 insert messages, got %d", testClient.InsertsLen()) } - } func TestBatchSize(t *testing.T) { diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index 1ae699a654..f6f3337840 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -8,7 +8,6 @@ import ( "github.com/apache/arrow/go/v13/arrow/util" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" - "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/rs/zerolog" ) @@ -28,7 +27,6 @@ type MixedBatchClient interface { } type MixedBatchWriter struct { - tables schema.Tables client MixedBatchClient logger zerolog.Logger batchTimeout time.Duration From 0d2fcf93523c9ffda1fb34cd9e3fbb9636cf0868 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:26:31 +0300 Subject: [PATCH 111/125] add lint --- internal/memdb/memdb.go | 4 +- internal/servers/plugin/v3/plugin.go | 36 ++--------------- internal/servers/plugin/v3/state.go | 4 +- plugin/plugin_test.go | 4 +- writers/batch.go | 60 ++++++++++++++-------------- writers/mixed_batch.go | 2 - 6 files changed, 39 insertions(+), 71 deletions(-) diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index 514dd9a7e5..b8c6ddf4bc 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -95,7 +95,7 @@ func (*client) GetSpec() any { return &Spec{} } -func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { +func (c *client) Read(_ context.Context, table *schema.Table, res chan<- arrow.Record) error { c.memoryDBLock.RLock() defer c.memoryDBLock.RUnlock() @@ -106,7 +106,7 @@ func (c *client) Read(ctx context.Context, table *schema.Table, res chan<- arrow return nil } -func (c *client) Sync(ctx context.Context, options plugin.SyncOptions, res chan<- message.Message) error { +func (c *client) Sync(_ context.Context, options plugin.SyncOptions, res chan<- message.Message) error { c.memoryDBLock.RLock() for tableName := range c.memoryDB { diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index aee9f90b50..0d8a2d809d 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -2,17 +2,14 @@ package plugin import ( "context" - "errors" "fmt" "io" - "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/getsentry/sentry-go" "github.com/rs/zerolog" "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" @@ -150,17 +147,10 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { return status.Errorf(codes.Internal, "unknown message type: %T", msg) } - // err := checkMessageSize(msg, rec) - // if err != nil { - // sc := rec.Schema() - // tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) - // s.Logger.Warn().Str("table", tName). - // Int("bytes", len(msg.String())). - // Msg("Row exceeding max bytes ignored") - // continue - // } - if err := stream.Send(pbMsg); err != nil { - return status.Errorf(codes.Internal, "failed to send resource: %v", err) + size := proto.Size(pbMsg) + if size > MaxMsgSize { + s.Logger.Error().Int("bytes", size).Msg("Message exceeds max size") + continue } } @@ -264,24 +254,6 @@ func (s *Server) Write(msg pb.Plugin_WriteServer) error { } } -func checkMessageSize(msg proto.Message, record arrow.Record) error { - size := proto.Size(msg) - // log error to Sentry if row exceeds half of the max size - if size > MaxMsgSize/2 { - sc := record.Schema() - tName, _ := sc.Metadata().GetValue(schema.MetadataTableName) - sentry.WithScope(func(scope *sentry.Scope) { - scope.SetTag("table", tName) - scope.SetExtra("bytes", size) - sentry.CurrentHub().CaptureMessage("Large message detected") - }) - } - if size > MaxMsgSize { - return errors.New("message exceeds max size") - } - return nil -} - func (s *Server) Close(ctx context.Context, _ *pb.Close_Request) (*pb.Close_Response, error) { return &pb.Close_Response{}, s.Plugin.Close(ctx) } diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index 146e646248..ded1d397c6 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -126,12 +126,12 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec *pbPlugin.S return c, nil } -func (c *ClientV3) SetKey(ctx context.Context, key string, value string) error { +func (c *ClientV3) SetKey(_ context.Context, key string, value string) error { c.mem[key] = value return nil } -func (c *ClientV3) flush(ctx context.Context) error { +func (c *ClientV3) Flush(ctx context.Context) error { bldr := array.NewRecordBuilder(memory.DefaultAllocator, nil) for k, v := range c.mem { bldr.Field(0).(*array.StringBuilder).Append(k) diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index dca7387f9a..c6fb0bb1c7 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -22,11 +22,11 @@ func (*testPluginClient) GetSpec() any { return &struct{}{} } -func (*testPluginClient) Tables(ctx context.Context) (schema.Tables, error) { +func (*testPluginClient) Tables(context.Context) (schema.Tables, error) { return schema.Tables{}, nil } -func (*testPluginClient) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { +func (*testPluginClient) Read(context.Context, *schema.Table, chan<- arrow.Record) error { return nil } diff --git a/writers/batch.go b/writers/batch.go index a060f91d75..58b1fc58a6 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -6,9 +6,7 @@ import ( "sync" "time" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/util" - "github.com/cloudquery/plugin-sdk/v4/internal/pk" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" "github.com/cloudquery/plugin-sdk/v4/schema" @@ -130,7 +128,7 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *m case r, ok := <-ch: if !ok { if len(resources) > 0 { - w.flush(ctx, tableName, resources) + w.flushTable(ctx, tableName, resources) } return } @@ -138,19 +136,19 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *m sizeBytes += util.TotalRecordSize(r.Record) if len(resources) >= w.batchSize || sizeBytes+util.TotalRecordSize(r.Record) >= int64(w.batchSizeBytes) { - w.flush(ctx, tableName, resources) + w.flushTable(ctx, tableName, resources) resources = make([]*message.Insert, 0) sizeBytes = 0 } case <-time.After(w.batchTimeout): if len(resources) > 0 { - w.flush(ctx, tableName, resources) + w.flushTable(ctx, tableName, resources) resources = make([]*message.Insert, 0) sizeBytes = 0 } case done := <-flush: if len(resources) > 0 { - w.flush(ctx, tableName, resources) + w.flushTable(ctx, tableName, resources) resources = make([]*message.Insert, 0) sizeBytes = 0 } @@ -162,7 +160,7 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *m } } -func (w *BatchWriter) flush(ctx context.Context, tableName string, resources []*message.Insert) { +func (w *BatchWriter) flushTable(ctx context.Context, tableName string, resources []*message.Insert) { // resources = w.removeDuplicatesByPK(table, resources) start := time.Now() batchSize := len(resources) @@ -173,32 +171,32 @@ func (w *BatchWriter) flush(ctx context.Context, tableName string, resources []* } } -func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []arrow.Record) []arrow.Record { - pkIndices := table.PrimaryKeysIndexes() - // special case where there's no PK at all - if len(pkIndices) == 0 { - return resources - } +// func (*BatchWriter) removeDuplicatesByPK(table *schema.Table, resources []*message.Insert) []*message.Insert { +// pkIndices := table.PrimaryKeysIndexes() +// // special case where there's no PK at all +// if len(pkIndices) == 0 { +// return resources +// } - pks := make(map[string]struct{}, len(resources)) - res := make([]arrow.Record, 0, len(resources)) - for _, r := range resources { - if r.NumRows() > 1 { - panic(fmt.Sprintf("record with more than 1 row: %d", r.NumRows())) - } - key := pk.String(r) - _, ok := pks[key] - if !ok { - pks[key] = struct{}{} - res = append(res, r) - continue - } - // duplicate, release - r.Release() - } +// pks := make(map[string]struct{}, len(resources)) +// res := make([]*message.Insert, 0, len(resources)) +// for _, r := range resources { +// if r.Record.NumRows() > 1 { +// panic(fmt.Sprintf("record with more than 1 row: %d", r.Record.NumRows())) +// } +// key := pk.String(r.Record) +// _, ok := pks[key] +// if !ok { +// pks[key] = struct{}{} +// res = append(res, r) +// continue +// } +// // duplicate, release +// r.Release() +// } - return res -} +// return res +// } func (w *BatchWriter) flushMigrateTables(ctx context.Context) error { w.migrateTableLock.Lock() diff --git a/writers/mixed_batch.go b/writers/mixed_batch.go index f6f3337840..ec9a3e48e3 100644 --- a/writers/mixed_batch.go +++ b/writers/mixed_batch.go @@ -17,8 +17,6 @@ const ( msgTypeDeleteStale ) -var allMsgTypes = []int{msgTypeMigrateTable, msgTypeInsert, msgTypeDeleteStale} - // MixedBatchClient is a client that will receive batches of messages with a mixture of tables. type MixedBatchClient interface { MigrateTableBatch(ctx context.Context, messages []*message.MigrateTable, options plugin.WriteOptions) error From 550f3151600c90edb1edc44723a8d92c42984b40 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:57:30 +0300 Subject: [PATCH 112/125] lint --- internal/servers/plugin/v3/state.go | 2 +- plugin/plugin.go | 8 ++++---- plugin/plugin_test.go | 4 ++-- writers/batch.go | 8 ++++---- writers/mixed_batch_test.go | 6 +++--- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index ded1d397c6..85e118570e 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -165,7 +165,7 @@ func (c *ClientV3) Flush(ctx context.Context) error { return nil } -func (c *ClientV3) GetKey(ctx context.Context, key string) (string, error) { +func (c *ClientV3) GetKey(_ context.Context, key string) (string, error) { if val, ok := c.mem[key]; ok { return val, nil } diff --git a/plugin/plugin.go b/plugin/plugin.go index a5a53e9066..8bc04e516e 100644 --- a/plugin/plugin.go +++ b/plugin/plugin.go @@ -22,21 +22,21 @@ type Client interface { type UnimplementedDestination struct{} -func (UnimplementedDestination) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { +func (UnimplementedDestination) Write(context.Context, WriteOptions, <-chan message.Message) error { return ErrNotImplemented } -func (UnimplementedDestination) Read(ctx context.Context, table *schema.Table, res chan<- arrow.Record) error { +func (UnimplementedDestination) Read(context.Context, *schema.Table, chan<- arrow.Record) error { return fmt.Errorf("not implemented") } type UnimplementedSource struct{} -func (UnimplementedSource) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { +func (UnimplementedSource) Sync(context.Context, SyncOptions, chan<- message.Message) error { return ErrNotImplemented } -func (UnimplementedSource) Tables(ctx context.Context) (schema.Tables, error) { +func (UnimplementedSource) Tables(context.Context) (schema.Tables, error) { return nil, ErrNotImplemented } diff --git a/plugin/plugin_test.go b/plugin/plugin_test.go index c6fb0bb1c7..57c3b8ebf9 100644 --- a/plugin/plugin_test.go +++ b/plugin/plugin_test.go @@ -30,13 +30,13 @@ func (*testPluginClient) Read(context.Context, *schema.Table, chan<- arrow.Recor return nil } -func (c *testPluginClient) Sync(ctx context.Context, options SyncOptions, res chan<- message.Message) error { +func (c *testPluginClient) Sync(_ context.Context, _ SyncOptions, res chan<- message.Message) error { for _, msg := range c.messages { res <- msg } return nil } -func (c *testPluginClient) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { +func (c *testPluginClient) Write(_ context.Context, _ WriteOptions, res <-chan message.Message) error { for msg := range res { c.messages = append(c.messages, msg) } diff --git a/writers/batch.go b/writers/batch.go index 58b1fc58a6..1b5c3b57c3 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -109,7 +109,7 @@ func (w *BatchWriter) Flush(ctx context.Context) error { return w.flushDeleteStaleTables(ctx) } -func (w *BatchWriter) Close(ctx context.Context) error { +func (w *BatchWriter) Close(context.Context) error { w.workersLock.Lock() defer w.workersLock.Unlock() for _, w := range w.workers { @@ -224,7 +224,7 @@ func (w *BatchWriter) flushDeleteStaleTables(ctx context.Context) error { return nil } -func (w *BatchWriter) flushInsert(ctx context.Context, tableName string) { +func (w *BatchWriter) flushInsert(tableName string) { w.workersLock.RLock() worker, ok := w.workers[tableName] if !ok { @@ -254,7 +254,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan message.Message) er if err := w.flushMigrateTables(ctx); err != nil { return err } - w.flushInsert(ctx, m.Table.Name) + w.flushInsert(m.Table.Name) w.deleteStaleLock.Lock() w.deleteStaleMessages = append(w.deleteStaleMessages, m) l := len(w.deleteStaleMessages) @@ -275,7 +275,7 @@ func (w *BatchWriter) Write(ctx context.Context, msgs <-chan message.Message) er return err } case *message.MigrateTable: - w.flushInsert(ctx, m.Table.Name) + w.flushInsert(m.Table.Name) if err := w.flushDeleteStaleTables(ctx); err != nil { return err } diff --git a/writers/mixed_batch_test.go b/writers/mixed_batch_test.go index 6f5a23245c..ee8c9bbc94 100644 --- a/writers/mixed_batch_test.go +++ b/writers/mixed_batch_test.go @@ -17,7 +17,7 @@ type testMixedBatchClient struct { receivedBatches [][]message.Message } -func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*message.MigrateTable, options plugin.WriteOptions) error { +func (c *testMixedBatchClient) MigrateTableBatch(_ context.Context, msgs []*message.MigrateTable, _ plugin.WriteOptions) error { m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -26,7 +26,7 @@ func (c *testMixedBatchClient) MigrateTableBatch(ctx context.Context, msgs []*me return nil } -func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*message.Insert, options plugin.WriteOptions) error { +func (c *testMixedBatchClient) InsertBatch(_ context.Context, msgs []*message.Insert, _ plugin.WriteOptions) error { m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg @@ -35,7 +35,7 @@ func (c *testMixedBatchClient) InsertBatch(ctx context.Context, msgs []*message. return nil } -func (c *testMixedBatchClient) DeleteStaleBatch(ctx context.Context, msgs []*message.DeleteStale, options plugin.WriteOptions) error { +func (c *testMixedBatchClient) DeleteStaleBatch(_ context.Context, msgs []*message.DeleteStale, _ plugin.WriteOptions) error { m := make([]message.Message, len(msgs)) for i, msg := range msgs { m[i] = msg From e88cb9d9c6f2dca3d7ff4f837fbf21f7589df175 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 22:28:10 +0300 Subject: [PATCH 113/125] fix lint --- internal/memdb/memdb_test.go | 2 +- plugin/diff.go | 4 -- plugin/nulls.go | 72 --------------------------------- plugin/testing_write.go | 19 +++++---- plugin/testing_write_migrate.go | 2 + serve/destination_v1_test.go | 5 --- 6 files changed, 12 insertions(+), 92 deletions(-) delete mode 100644 plugin/nulls.go diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index fe240fe58c..e0809b05cd 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -16,7 +16,7 @@ func TestPlugin(t *testing.T) { plugin.TestWriterSuiteRunner( t, p, - plugin.PluginTestSuiteTests{ + plugin.WriterTestSuiteTests{ SafeMigrations: plugin.SafeMigrations{}, }, ) diff --git a/plugin/diff.go b/plugin/diff.go index 343de8d19b..a5e532a9fe 100644 --- a/plugin/diff.go +++ b/plugin/diff.go @@ -31,7 +31,3 @@ func RecordDiff(l, r arrow.Record) string { } return sb.String() } - -func recordApproxEqual(l, r arrow.Record) bool { - return array.RecordApproxEqual(l, r, array.WithUnorderedMapKeys(true)) -} diff --git a/plugin/nulls.go b/plugin/nulls.go deleted file mode 100644 index e60a88a24f..0000000000 --- a/plugin/nulls.go +++ /dev/null @@ -1,72 +0,0 @@ -package plugin - -import ( - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/memory" -) - -func stripNullsFromLists(records []arrow.Record) { - for i := range records { - cols := records[i].Columns() - for c, col := range cols { - if col.DataType().ID() != arrow.LIST { - continue - } - - list := col.(*array.List) - bldr := array.NewListBuilder(memory.DefaultAllocator, list.DataType().(*arrow.ListType).Elem()) - for j := 0; j < list.Len(); j++ { - if list.IsNull(j) { - bldr.AppendNull() - continue - } - bldr.Append(true) - vBldr := bldr.ValueBuilder() - from, to := list.ValueOffsets(j) - slc := array.NewSlice(list.ListValues(), from, to) - for k := 0; k < int(to-from); k++ { - if slc.IsNull(k) { - continue - } - err := vBldr.AppendValueFromString(slc.ValueStr(k)) - if err != nil { - panic(err) - } - } - } - cols[c] = bldr.NewArray() - } - records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) - } -} - -type AllowNullFunc func(arrow.DataType) bool - -func (f AllowNullFunc) replaceNullsByEmpty(records []arrow.Record) { - if f == nil { - return - } - for i := range records { - cols := records[i].Columns() - for c, col := range records[i].Columns() { - if col.NullN() == 0 || f(col.DataType()) { - continue - } - - builder := array.NewBuilder(memory.DefaultAllocator, records[i].Column(c).DataType()) - for j := 0; j < col.Len(); j++ { - if col.IsNull(j) { - builder.AppendEmptyValue() - continue - } - - if err := builder.AppendValueFromString(col.ValueStr(j)); err != nil { - panic(err) - } - } - cols[c] = builder.NewArray() - } - records[i] = array.NewRecord(records[i].Schema(), cols, records[i].NumRows()) - } -} diff --git a/plugin/testing_write.go b/plugin/testing_write.go index e3a6eddc39..4f198e80a8 100644 --- a/plugin/testing_write.go +++ b/plugin/testing_write.go @@ -4,12 +4,11 @@ import ( "context" "testing" - "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/schema" ) type WriterTestSuite struct { - tests PluginTestSuiteTests + tests WriterTestSuiteTests plugin *Plugin @@ -17,7 +16,7 @@ type WriterTestSuite struct { // Destinations that have problems representing some data types should provide a custom implementation here. // If this param is empty, the default is to allow all data types to be nullable. // When the value returned by this func is `true` the comparison is made with the empty value instead of null. - allowNull AllowNullFunc + // allowNull AllowNullFunc // IgnoreNullsInLists allows stripping null values from lists before comparison. // Destination setups that don't support nulls in lists should set this to true. @@ -36,7 +35,7 @@ type SafeMigrations struct { ChangeColumn bool } -type PluginTestSuiteTests struct { +type WriterTestSuiteTests struct { // SkipUpsert skips testing with message.Insert and Upsert=true. // Usually when a destination is not supporting primary keys SkipUpsert bool @@ -57,11 +56,11 @@ type PluginTestSuiteTests struct { type NewPluginFunc func() *Plugin -func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *WriterTestSuite) { - return func(o *WriterTestSuite) { - o.allowNull = allowNull - } -} +// func WithTestSourceAllowNull(allowNull func(arrow.DataType) bool) func(o *WriterTestSuite) { +// return func(o *WriterTestSuite) { +// o.allowNull = allowNull +// } +// } func WithTestIgnoreNullsInLists() func(o *WriterTestSuite) { return func(o *WriterTestSuite) { @@ -75,7 +74,7 @@ func WithTestDataOptions(opts schema.TestSourceOptions) func(o *WriterTestSuite) } } -func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests PluginTestSuiteTests, opts ...func(o *WriterTestSuite)) { +func TestWriterSuiteRunner(t *testing.T, p *Plugin, tests WriterTestSuiteTests, opts ...func(o *WriterTestSuite)) { suite := &WriterTestSuite{ tests: tests, plugin: p, diff --git a/plugin/testing_write_migrate.go b/plugin/testing_write_migrate.go index 7418f2b043..af6224cbe2 100644 --- a/plugin/testing_write_migrate.go +++ b/plugin/testing_write_migrate.go @@ -18,6 +18,7 @@ func tableUUIDSuffix() string { return strings.ReplaceAll(uuid.NewString(), "-", "_")[:8] // use only first 8 chars } +// nolint:revive func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, source *schema.Table, supportsSafeMigrate bool, writeOptionMigrateForce bool) error { if err := s.plugin.writeOne(ctx, WriteOptions{ MigrateForce: writeOptionMigrateForce, @@ -86,6 +87,7 @@ func (s *WriterTestSuite) migrate(ctx context.Context, target *schema.Table, sou return nil } +// nolint:revive func (s *WriterTestSuite) testMigrate( ctx context.Context, t *testing.T, diff --git a/serve/destination_v1_test.go b/serve/destination_v1_test.go index e4caa91734..c13b56232c 100644 --- a/serve/destination_v1_test.go +++ b/serve/destination_v1_test.go @@ -159,11 +159,6 @@ func TestDestinationV1(t *testing.T) { t.Fatal(err) } - _, err = c.GetMetrics(ctx, &pb.GetDestinationMetrics_Request{}) - if err != nil { - t.Fatal(err) - } - if _, err := c.Close(ctx, &pb.Close_Request{}); err != nil { t.Fatalf("failed to call Close: %v", err) } From 1a744ce887b010f40f4940d39fec34c104fe7dce Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Fri, 23 Jun 2023 22:34:21 +0300 Subject: [PATCH 114/125] merge with main --- go.sum | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go.sum b/go.sum index ac3cbf3eab..ed14b152b7 100644 --- a/go.sum +++ b/go.sum @@ -47,8 +47,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66 h1:8eQrRKCk6OwCiIW43+Y10p2nkTdTATu5kqXEA7iBlg8= -github.com/cloudquery/arrow/go/v13 v13.0.0-20230623001532-8366a2241e66/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c h1:nQSB4v0QxCW5XDLvVBcaNrsJ+J/esMBoFYjymllxM1E= +github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= github.com/cloudquery/plugin-pb-go v1.3.2 h1:q/REJeRr5zyyNUZMcvE43+X7hV+zjzWqLFlWWOnWbvs= github.com/cloudquery/plugin-pb-go v1.3.2/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= From 580736813c95fd22d9e9deb9e4f3e0329dfaa55b Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 24 Jun 2023 09:49:04 +0300 Subject: [PATCH 115/125] fix tests --- internal/memdb/memdb.go | 2 +- internal/memdb/memdb_test.go | 61 +++++++---------------- internal/servers/plugin/v3/plugin.go | 3 ++ internal/servers/plugin/v3/plugin_test.go | 52 +++++++++++++++++++ 4 files changed, 75 insertions(+), 43 deletions(-) create mode 100644 internal/servers/plugin/v3/plugin_test.go diff --git a/internal/memdb/memdb.go b/internal/memdb/memdb.go index b8c6ddf4bc..2f7ce4fb5e 100644 --- a/internal/memdb/memdb.go +++ b/internal/memdb/memdb.go @@ -57,7 +57,7 @@ func NewMemDBClient(ctx context.Context, l zerolog.Logger, spec []byte) (plugin. return GetNewClient()(ctx, l, spec) } -func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, any) (plugin.Client, error) { +func NewMemDBClientErrOnNew(context.Context, zerolog.Logger, []byte) (plugin.Client, error) { return nil, fmt.Errorf("newTestDestinationMemDBClientErrOnNew") } diff --git a/internal/memdb/memdb_test.go b/internal/memdb/memdb_test.go index e0809b05cd..cd06eaa230 100644 --- a/internal/memdb/memdb_test.go +++ b/internal/memdb/memdb_test.go @@ -22,50 +22,27 @@ func TestPlugin(t *testing.T) { ) } -// func TestPluginOnNewError(t *testing.T) { -// ctx := context.Background() -// p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) -// err := p.Init(ctx, nil) +func TestPluginOnNewError(t *testing.T) { + ctx := context.Background() + p := plugin.NewPlugin("test", "development", NewMemDBClientErrOnNew) + err := p.Init(ctx, nil) -// if err == nil { -// t.Fatal("expected error") -// } -// } + if err == nil { + t.Fatal("expected error") + } +} -// func TestOnWriteError(t *testing.T) { -// ctx := context.Background() -// newClientFunc := GetNewClient(WithErrOnWrite()) -// p := plugin.NewPlugin("test", "development", newClientFunc) -// if err := p.Init(ctx, nil); err != nil { -// t.Fatal(err) -// } -// table := schema.TestTable("test", schema.TestSourceOptions{}) -// tables := schema.Tables{ -// table, -// } -// sourceName := "TestDestinationOnWriteError" -// syncTime := time.Now() -// sourceSpec := pbPlugin.Spec{ -// Name: sourceName, -// } -// ch := make(chan arrow.Record, 1) -// opts := schema.GenTestDataOptions{ -// SourceName: "test", -// SyncTime: time.Now(), -// MaxRows: 1, -// StableUUID: uuid.Nil, -// } -// record := schema.GenTestData(table, opts)[0] -// ch <- record -// close(ch) -// err := p.Write(ctx, sourceSpec, tables, syncTime, ch) -// if err == nil { -// t.Fatal("expected error") -// } -// if err.Error() != "errOnWrite" { -// t.Fatalf("expected errOnWrite, got %s", err.Error()) -// } -// } +func TestOnWriteError(t *testing.T) { + ctx := context.Background() + newClientFunc := GetNewClient(WithErrOnWrite()) + p := plugin.NewPlugin("test", "development", newClientFunc) + if err := p.Init(ctx, nil); err != nil { + t.Fatal(err) + } + if err := p.WriteAll(ctx, plugin.WriteOptions{}, nil); err.Error() != "errOnWrite" { + t.Fatalf("expected errOnWrite, got %s", err) + } +} // func TestOnWriteCtxCancelled(t *testing.T) { // ctx := context.Background() diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index 0d8a2d809d..d240fac373 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -152,6 +152,9 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { s.Logger.Error().Int("bytes", size).Msg("Message exceeds max size") continue } + if err := stream.Send(pbMsg); err != nil { + return status.Errorf(codes.Internal, "failed to send message: %v", err) + } } return syncErr diff --git a/internal/servers/plugin/v3/plugin_test.go b/internal/servers/plugin/v3/plugin_test.go new file mode 100644 index 0000000000..8c4fb8b67f --- /dev/null +++ b/internal/servers/plugin/v3/plugin_test.go @@ -0,0 +1,52 @@ +package plugin + +import ( + "context" + "testing" + + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/plugin" +) + +func TestGetName(t *testing.T) { + ctx := context.Background() + s := Server{ + Plugin: plugin.NewPlugin("test", "development", memdb.NewMemDBClient), + } + res, err := s.GetName(ctx, &pb.GetName_Request{}) + if err != nil { + t.Fatal(err) + } + if res.Name != "test" { + t.Fatalf("expected test, got %s", res.GetName()) + } +} + +func TestGetVersion(t *testing.T) { + ctx := context.Background() + s := Server{ + Plugin: plugin.NewPlugin("test", "development", memdb.NewMemDBClient), + } + resVersion, err := s.GetVersion(ctx, &pb.GetVersion_Request{}) + if err != nil { + t.Fatal(err) + } + if resVersion.Version != "development" { + t.Fatalf("expected development, got %s", resVersion.GetVersion()) + } +} + +func TestPluginSync(t *testing.T) { + ctx := context.Background() + s := Server{ + Plugin: plugin.NewPlugin("test", "development", memdb.NewMemDBClient), + } + + _, err := s.Init(ctx, &pb.Init_Request{}) + if err != nil { + t.Fatal(err) + } + + // err = s.Sync(&pb.Sync_Request{}, ) +} From 64995a0f7799648859aa9f0472357323a9751942 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 24 Jun 2023 11:54:15 +0300 Subject: [PATCH 116/125] add more tests --- internal/servers/plugin/v3/plugin_test.go | 143 +++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/internal/servers/plugin/v3/plugin_test.go b/internal/servers/plugin/v3/plugin_test.go index 8c4fb8b67f..4b03fc53dd 100644 --- a/internal/servers/plugin/v3/plugin_test.go +++ b/internal/servers/plugin/v3/plugin_test.go @@ -2,11 +2,18 @@ package plugin import ( "context" + "io" "testing" + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" ) func TestGetName(t *testing.T) { @@ -37,6 +44,68 @@ func TestGetVersion(t *testing.T) { } } +type mockSyncServer struct { + grpc.ServerStream + messages []*pb.Sync_Response +} + +func (s *mockSyncServer) Send(*pb.Sync_Response) error { + s.messages = append(s.messages, &pb.Sync_Response{}) + return nil +} + +func (*mockSyncServer) SetHeader(metadata.MD) error { + return nil +} +func (*mockSyncServer) SendHeader(metadata.MD) error { + return nil +} +func (*mockSyncServer) SetTrailer(metadata.MD) { +} +func (*mockSyncServer) Context() context.Context { + return context.Background() +} +func (*mockSyncServer) SendMsg(any) error { + return nil +} +func (*mockSyncServer) RecvMsg(any) error { + return nil +} + +type mockWriteServer struct { + grpc.ServerStream + messages []*pb.Write_Request +} + +func (*mockWriteServer) SendAndClose(*pb.Write_Response) error { + return nil +} +func (s *mockWriteServer) Recv() (*pb.Write_Request, error) { + if len(s.messages) > 0 { + msg := s.messages[0] + s.messages = s.messages[1:] + return msg, nil + } + return nil, io.EOF +} +func (*mockWriteServer) SetHeader(metadata.MD) error { + return nil +} +func (*mockWriteServer) SendHeader(metadata.MD) error { + return nil +} +func (*mockWriteServer) SetTrailer(metadata.MD) { +} +func (*mockWriteServer) Context() context.Context { + return context.Background() +} +func (*mockWriteServer) SendMsg(any) error { + return nil +} +func (*mockWriteServer) RecvMsg(any) error { + return nil +} + func TestPluginSync(t *testing.T) { ctx := context.Background() s := Server{ @@ -48,5 +117,77 @@ func TestPluginSync(t *testing.T) { t.Fatal(err) } - // err = s.Sync(&pb.Sync_Request{}, ) + streamSyncServer := &mockSyncServer{} + if err := s.Sync(&pb.Sync_Request{}, streamSyncServer); err != nil { + t.Fatal(err) + } + if len(streamSyncServer.messages) != 0 { + t.Fatalf("expected 0 messages, got %d", len(streamSyncServer.messages)) + } + writeMockServer := &mockWriteServer{} + + if err := s.Write(writeMockServer); err == nil { + t.Fatal("expected error, got nil") + } + table := &schema.Table{ + Name: "test", + Columns: []schema.Column{ + { + Name: "test", + Type: arrow.BinaryTypes.String, + }, + }, + } + sc := table.ToArrowSchema() + b, err := pb.SchemaToBytes(sc) + if err != nil { + t.Fatal(err) + } + bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc) + bldr.Field(0).(*array.StringBuilder).Append("test") + record := bldr.NewRecord() + recordBytes, err := pb.RecordToBytes(record) + if err != nil { + t.Fatal(err) + } + + writeMockServer.messages = []*pb.Write_Request{ + { + Message: &pb.Write_Request_Options{ + Options: &pb.WriteOptions{}, + }, + }, + { + Message: &pb.Write_Request_MigrateTable{ + MigrateTable: &pb.MessageMigrateTable{ + Table: b, + }, + }, + }, + { + Message: &pb.Write_Request_Insert{ + Insert: &pb.MessageInsert{ + Record: recordBytes, + }, + }, + }, + } + + if err := s.Write(writeMockServer); err != nil { + t.Fatal(err) + } + + streamSyncServer = &mockSyncServer{} + if err := s.Sync(&pb.Sync_Request{ + Tables: []string{"*"}, + }, streamSyncServer); err != nil { + t.Fatal(err) + } + if len(streamSyncServer.messages) != 1 { + t.Fatalf("expected 1 message, got %d", len(streamSyncServer.messages)) + } + + if _, err := s.Close(ctx, &pb.Close_Request{}); err != nil { + t.Fatal(err) + } } From f522ea0e2b16890cc2bd9cc3a4dbb0f369cbf48b Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 24 Jun 2023 12:25:35 +0300 Subject: [PATCH 117/125] more tests --- .../destination/v1/destination_test.go | 38 +++++++++++++++++++ .../servers/discovery/v0/discovery_test.go | 28 ++++++++++++++ .../servers/discovery/v1/discovery_test.go | 28 ++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 internal/servers/destination/v1/destination_test.go create mode 100644 internal/servers/discovery/v0/discovery_test.go create mode 100644 internal/servers/discovery/v1/discovery_test.go diff --git a/internal/servers/destination/v1/destination_test.go b/internal/servers/destination/v1/destination_test.go new file mode 100644 index 0000000000..8fd6accc9c --- /dev/null +++ b/internal/servers/destination/v1/destination_test.go @@ -0,0 +1,38 @@ +package destination + +import ( + "context" + "testing" + + pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/plugin" +) + +func TestGetName(t *testing.T) { + ctx := context.Background() + s := Server{ + Plugin: plugin.NewPlugin("test", "development", memdb.NewMemDBClient), + } + res, err := s.GetName(ctx, &pb.GetName_Request{}) + if err != nil { + t.Fatal(err) + } + if res.Name != "test" { + t.Fatalf("expected test, got %s", res.GetName()) + } +} + +func TestGetVersion(t *testing.T) { + ctx := context.Background() + s := Server{ + Plugin: plugin.NewPlugin("test", "development", memdb.NewMemDBClient), + } + resVersion, err := s.GetVersion(ctx, &pb.GetVersion_Request{}) + if err != nil { + t.Fatal(err) + } + if resVersion.Version != "development" { + t.Fatalf("expected development, got %s", resVersion.GetVersion()) + } +} diff --git a/internal/servers/discovery/v0/discovery_test.go b/internal/servers/discovery/v0/discovery_test.go new file mode 100644 index 0000000000..0eaab884d7 --- /dev/null +++ b/internal/servers/discovery/v0/discovery_test.go @@ -0,0 +1,28 @@ +package discovery + +import ( + "context" + "testing" + + pb "github.com/cloudquery/plugin-pb-go/pb/discovery/v0" +) + +func TestDiscovery(t *testing.T) { + ctx := context.Background() + s := &Server{ + Versions: []string{"1", "2"}, + } + resp, err := s.GetVersions(ctx, &pb.GetVersions_Request{}) + if err != nil { + t.Fatal(err) + } + if len(resp.Versions) != 2 { + t.Fatal("expected 2 versions") + } + if resp.Versions[0] != "1" { + t.Fatal("expected version 1") + } + if resp.Versions[1] != "2" { + t.Fatal("expected version 2") + } +} diff --git a/internal/servers/discovery/v1/discovery_test.go b/internal/servers/discovery/v1/discovery_test.go new file mode 100644 index 0000000000..a54b24c746 --- /dev/null +++ b/internal/servers/discovery/v1/discovery_test.go @@ -0,0 +1,28 @@ +package discovery + +import ( + "context" + "testing" + + pb "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" +) + +func TestDiscovery(t *testing.T) { + ctx := context.Background() + s := &Server{ + Versions: []int32{1, 2}, + } + resp, err := s.GetVersions(ctx, &pb.GetVersions_Request{}) + if err != nil { + t.Fatal(err) + } + if len(resp.Versions) != 2 { + t.Fatal("expected 2 versions") + } + if resp.Versions[0] != 1 { + t.Fatal("expected version 1") + } + if resp.Versions[1] != 2 { + t.Fatal("expected version 2") + } +} From 908fce14aec528b2d024a54171b77cb462acae83 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 24 Jun 2023 12:26:59 +0300 Subject: [PATCH 118/125] remove buf.yaml --- buf.yaml | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 buf.yaml diff --git a/buf.yaml b/buf.yaml deleted file mode 100644 index b348cd312c..0000000000 --- a/buf.yaml +++ /dev/null @@ -1,12 +0,0 @@ -version: v1 -breaking: - use: - - FILE -lint: - use: - - BASIC - ignore: - # We are ignoring those as this is an old version and we are not doing any changes here anymore - - cloudquery/destination/v0/destination.proto - - cloudquery/source/v0/source.proto - - cloudquery/base/v0/base.proto From 12f41ffdd6923edab598a7fcc2a9e49d42136582 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 24 Jun 2023 13:07:29 +0300 Subject: [PATCH 119/125] more tests --- go.mod | 2 +- go.sum | 4 +- .../destination/v1/destination_test.go | 114 ++++++++++++++++++ 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index c262d9a980..4475f91509 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.19 require ( github.com/apache/arrow/go/v13 v13.0.0-20230622042343-ec413b7763fe github.com/bradleyjkemp/cupaloy/v2 v2.8.0 - github.com/cloudquery/plugin-pb-go v1.3.2 + github.com/cloudquery/plugin-pb-go v1.3.3 github.com/cloudquery/plugin-sdk/v2 v2.7.0 github.com/getsentry/sentry-go v0.20.0 github.com/goccy/go-json v0.10.0 diff --git a/go.sum b/go.sum index ed14b152b7..e0145b6d6f 100644 --- a/go.sum +++ b/go.sum @@ -49,8 +49,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c h1:nQSB4v0QxCW5XDLvVBcaNrsJ+J/esMBoFYjymllxM1E= github.com/cloudquery/arrow/go/v13 v13.0.0-20230610001216-0f7bd3beda2c/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc= -github.com/cloudquery/plugin-pb-go v1.3.2 h1:q/REJeRr5zyyNUZMcvE43+X7hV+zjzWqLFlWWOnWbvs= -github.com/cloudquery/plugin-pb-go v1.3.2/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= +github.com/cloudquery/plugin-pb-go v1.3.3 h1:+4y6hIDm2NtV0ohzgrWzXMaaOTanh7nmDpDgkFtgBlg= +github.com/cloudquery/plugin-pb-go v1.3.3/go.mod h1:NbWAtT2BzJQ9+XUWwh3IKBg3MOeV9ZEpHoHNAQ/YDV8= github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U= github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= diff --git a/internal/servers/destination/v1/destination_test.go b/internal/servers/destination/v1/destination_test.go index 8fd6accc9c..9d398f0599 100644 --- a/internal/servers/destination/v1/destination_test.go +++ b/internal/servers/destination/v1/destination_test.go @@ -2,11 +2,21 @@ package destination import ( "context" + "encoding/json" + "io" "testing" + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" pb "github.com/cloudquery/plugin-pb-go/pb/destination/v1" + pbSource "github.com/cloudquery/plugin-pb-go/pb/source/v2" + "github.com/cloudquery/plugin-pb-go/specs" "github.com/cloudquery/plugin-sdk/v4/internal/memdb" "github.com/cloudquery/plugin-sdk/v4/plugin" + "github.com/cloudquery/plugin-sdk/v4/schema" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" ) func TestGetName(t *testing.T) { @@ -36,3 +46,107 @@ func TestGetVersion(t *testing.T) { t.Fatalf("expected development, got %s", resVersion.GetVersion()) } } + +type mockWriteServer struct { + grpc.ServerStream + messages []*pb.Write_Request +} + +func (*mockWriteServer) SendAndClose(*pb.Write_Response) error { + return nil +} +func (s *mockWriteServer) Recv() (*pb.Write_Request, error) { + if len(s.messages) > 0 { + msg := s.messages[0] + s.messages = s.messages[1:] + return msg, nil + } + return nil, io.EOF +} +func (*mockWriteServer) SetHeader(metadata.MD) error { + return nil +} +func (*mockWriteServer) SendHeader(metadata.MD) error { + return nil +} +func (*mockWriteServer) SetTrailer(metadata.MD) { +} +func (*mockWriteServer) Context() context.Context { + return context.Background() +} +func (*mockWriteServer) SendMsg(any) error { + return nil +} +func (*mockWriteServer) RecvMsg(any) error { + return nil +} + +func TestPluginSync(t *testing.T) { + ctx := context.Background() + s := Server{ + Plugin: plugin.NewPlugin("test", "development", memdb.NewMemDBClient), + } + destinationSpec := specs.Destination{ + Name: "test", + } + destinationSpecBytes, err := json.Marshal(destinationSpec) + if err != nil { + t.Fatal(err) + } + _, err = s.Configure(ctx, &pb.Configure_Request{ + Config: destinationSpecBytes, + }) + if err != nil { + t.Fatal(err) + } + + writeMockServer := &mockWriteServer{} + if err := s.Write(writeMockServer); err != nil { + t.Fatal(err) + } + table := &schema.Table{ + Name: "test", + Columns: []schema.Column{ + { + Name: "test", + Type: arrow.BinaryTypes.String, + }, + }, + } + schemas := schema.Tables{table}.ToArrowSchemas() + schemaBytes, err := pbSource.SchemasToBytes(schemas) + if err != nil { + t.Fatal(err) + } + sc := table.ToArrowSchema() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc) + bldr.Field(0).(*array.StringBuilder).Append("test") + record := bldr.NewRecord() + recordBytes, err := pbSource.RecordToBytes(record) + if err != nil { + t.Fatal(err) + } + + sourceSpec := specs.Source{ + Name: "source_test", + } + sourceSpecBytes, err := json.Marshal(sourceSpec) + if err != nil { + t.Fatal(err) + } + + writeMockServer.messages = []*pb.Write_Request{ + { + Tables: schemaBytes, + Resource: recordBytes, + SourceSpec: sourceSpecBytes, + }, + } + if err := s.Write(writeMockServer); err != nil { + t.Fatal(err) + } + + if _, err := s.Close(ctx, &pb.Close_Request{}); err != nil { + t.Fatal(err) + } +} From 9486b508539a1c012159789ba5211b2460ff78ac Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 24 Jun 2023 19:13:48 +0300 Subject: [PATCH 120/125] remove empty files --- helpers/internal_columns.go | 1 - 1 file changed, 1 deletion(-) delete mode 100644 helpers/internal_columns.go diff --git a/helpers/internal_columns.go b/helpers/internal_columns.go deleted file mode 100644 index 345b806ac4..0000000000 --- a/helpers/internal_columns.go +++ /dev/null @@ -1 +0,0 @@ -package helpers From 0c2171192d1b8e4e5bdb694ba9a881e4a33710e5 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sat, 24 Jun 2023 19:24:30 +0300 Subject: [PATCH 121/125] nits --- internal/servers/plugin/v3/plugin.go | 3 ++- internal/servers/plugin/v3/state.go | 4 +++- schema/table.go | 11 ----------- serve/plugin_test.go | 3 ++- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index d240fac373..bd8c4f3581 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -132,7 +132,8 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { }, } case *message.DeleteStale: - tableBytes, err := m.Table.ToArrowSchemaBytes() + sc := m.Table.ToArrowSchema() + tableBytes, err := pb.SchemaToBytes(sc) if err != nil { return status.Errorf(codes.Internal, "failed to encode record: %v", err) } diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go index 85e118570e..d7cff60018 100644 --- a/internal/servers/plugin/v3/state.go +++ b/internal/servers/plugin/v3/state.go @@ -11,6 +11,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/apache/arrow/go/v13/arrow/memory" pbDiscovery "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/schema" "github.com/cloudquery/plugin-sdk/v4/state" @@ -60,7 +61,8 @@ func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec *pbPlugin.S }, }, } - tableBytes, err := table.ToArrowSchemaBytes() + sc := table.ToArrowSchema() + tableBytes, err := pb.SchemaToBytes(sc) if err != nil { return nil, err } diff --git a/schema/table.go b/schema/table.go index a342ff1ee0..c7680a32c0 100644 --- a/schema/table.go +++ b/schema/table.go @@ -1,13 +1,11 @@ package schema import ( - "bytes" "context" "fmt" "regexp" "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/cloudquery/plugin-sdk/v4/glob" "golang.org/x/exp/slices" ) @@ -367,15 +365,6 @@ func (t *Table) PrimaryKeysIndexes() []int { return primaryKeys } -func (t *Table) ToArrowSchemaBytes() ([]byte, error) { - sc := t.ToArrowSchema() - var buf bytes.Buffer - wr := ipc.NewWriter(&buf, ipc.WithSchema(sc)) - if err := wr.Close(); err != nil { - return nil, err - } - return buf.Bytes(), nil -} func (t *Table) ToArrowSchema() *arrow.Schema { fields := make([]arrow.Field, len(t.Columns)) diff --git a/serve/plugin_test.go b/serve/plugin_test.go index ad91d862f6..c648d53976 100644 --- a/serve/plugin_test.go +++ b/serve/plugin_test.go @@ -100,7 +100,8 @@ func TestPluginServe(t *testing.T) { if err != nil { t.Fatal(err) } - tableBytes, err := testTable.ToArrowSchemaBytes() + sc := testTable.ToArrowSchema() + tableBytes, err := pb.SchemaToBytes(sc) if err != nil { t.Fatal(err) } From d3cd5aad7dec0873d69bc970857ccb637f6bcbf7 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Sun, 25 Jun 2023 10:45:10 +0300 Subject: [PATCH 122/125] make state it's own package --- internal/clients/state/v3/state.go | 183 ++++++++++++++++++++++++ internal/clients/state/v3/state_test.go | 3 + internal/servers/plugin/v3/plugin.go | 24 ---- internal/servers/plugin/v3/state.go | 175 ---------------------- plugin/plugin_destination.go | 4 + plugin/plugin_source.go | 5 +- serve/state_test.go | 83 +++++++++++ state/state.go | 23 ++- 8 files changed, 298 insertions(+), 202 deletions(-) create mode 100644 internal/clients/state/v3/state.go create mode 100644 internal/clients/state/v3/state_test.go delete mode 100644 internal/servers/plugin/v3/state.go create mode 100644 serve/state_test.go diff --git a/internal/clients/state/v3/state.go b/internal/clients/state/v3/state.go new file mode 100644 index 0000000000..fc713ed9a2 --- /dev/null +++ b/internal/clients/state/v3/state.go @@ -0,0 +1,183 @@ +package state + +import ( + "bytes" + "context" + "fmt" + "io" + "sync" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/ipc" + "github.com/apache/arrow/go/v13/arrow/memory" + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/schema" +) + +const keyColumn = "key" +const valueColumn = "value" + +type Client struct { + client pb.PluginClient + tableName string + mem map[string]string + mutex *sync.RWMutex + keys []string + values []string + schema *arrow.Schema +} + +func NewClient(ctx context.Context, pbClient pb.PluginClient, tableName string) (*Client, error) { + c := &Client{ + client: pbClient, + tableName: tableName, + mem: make(map[string]string), + mutex: &sync.RWMutex{}, + keys: make([]string, 0), + values: make([]string, 0), + } + table := &schema.Table{ + Name: tableName, + Columns: []schema.Column{ + { + Name: keyColumn, + Type: arrow.BinaryTypes.String, + PrimaryKey: true, + }, + { + Name: valueColumn, + Type: arrow.BinaryTypes.String, + }, + }, + } + sc := table.ToArrowSchema() + c.schema = sc + tableBytes, err := pb.SchemaToBytes(sc) + if err != nil { + return nil, err + } + + writeClient, err := c.client.Write(ctx) + if err != nil { + return nil, err + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_Options{ + Options: &pb.WriteOptions{MigrateForce: false}, + }, + }); err != nil { + return nil, err + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_MigrateTable{ + MigrateTable: &pb.MessageMigrateTable{ + Table: tableBytes, + }, + }, + }); err != nil { + return nil, err + } + + syncClient, err := c.client.Sync(ctx, &pb.Sync_Request{ + Tables: []string{tableName}, + }) + if err != nil { + return nil, err + } + c.mutex.Lock() + defer c.mutex.Unlock() + for { + res, err := syncClient.Recv() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + var insertMessage *pb.Sync_Response_Insert + switch m := res.Message.(type) { + case *pb.Sync_Response_Delete: + continue + case *pb.Sync_Response_MigrateTable: + continue + case *pb.Sync_Response_Insert: + insertMessage = m + } + rdr, err := ipc.NewReader(bytes.NewReader(insertMessage.Insert.Record)) + if err != nil { + return nil, err + } + for { + record, err := rdr.Read() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + if record.NumRows() == 0 { + continue + } + keys := record.Columns()[0].(*array.String) + values := record.Columns()[1].(*array.String) + for i := 0; i < keys.Len(); i++ { + c.mem[keys.Value(i)] = values.Value(i) + } + } + } + return c, nil +} + +func (c *Client) SetKey(_ context.Context, key string, value string) error { + c.mutex.Lock() + defer c.mutex.Unlock() + c.mem[key] = value + return nil +} + +func (c *Client) Flush(ctx context.Context) error { + c.mutex.RLock() + defer c.mutex.RUnlock() + bldr := array.NewRecordBuilder(memory.DefaultAllocator, c.schema) + for k, v := range c.mem { + bldr.Field(0).(*array.StringBuilder).Append(k) + bldr.Field(1).(*array.StringBuilder).Append(v) + } + rec := bldr.NewRecord() + recordBytes, err := pb.RecordToBytes(rec) + if err != nil { + return err + } + writeClient, err := c.client.Write(ctx) + if err != nil { + return err + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_Options{}, + }); err != nil { + return err + } + if err := writeClient.Send(&pb.Write_Request{ + Message: &pb.Write_Request_Insert{ + Insert: &pb.MessageInsert{ + Record: recordBytes, + }, + }, + }); err != nil { + return err + } + if _, err := writeClient.CloseAndRecv(); err != nil { + return err + } + return nil +} + +func (c *Client) GetKey(_ context.Context, key string) (string, error) { + c.mutex.RLock() + defer c.mutex.RUnlock() + if val, ok := c.mem[key]; ok { + return val, nil + } + return "", fmt.Errorf("key not found") +} diff --git a/internal/clients/state/v3/state_test.go b/internal/clients/state/v3/state_test.go new file mode 100644 index 0000000000..ab446d55fc --- /dev/null +++ b/internal/clients/state/v3/state_test.go @@ -0,0 +1,3 @@ +package state + +// Note: State is tested under serve/state_test.go with a real plugin server. diff --git a/internal/servers/plugin/v3/plugin.go b/internal/servers/plugin/v3/plugin.go index bd8c4f3581..b424b0bd26 100644 --- a/internal/servers/plugin/v3/plugin.go +++ b/internal/servers/plugin/v3/plugin.go @@ -5,7 +5,6 @@ import ( "fmt" "io" - "github.com/cloudquery/plugin-pb-go/managedplugin" pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/plugin" @@ -75,29 +74,6 @@ func (s *Server) Sync(req *pb.Sync_Request, stream pb.Plugin_SyncServer) error { SkipTables: req.SkipTables, } - if req.StateBackend != nil { - opts := []managedplugin.Option{ - managedplugin.WithLogger(s.Logger), - managedplugin.WithDirectory(s.Directory), - } - if s.NoSentry { - opts = append(opts, managedplugin.WithNoSentry()) - } - statePlugin, err := managedplugin.NewClient(ctx, managedplugin.PluginDestination, managedplugin.Config{ - Path: req.StateBackend.Path, - Registry: managedplugin.Registry(req.StateBackend.Registry), - Version: req.StateBackend.Version, - }, opts...) - if err != nil { - return status.Errorf(codes.Internal, "failed to create state plugin: %v", err) - } - stateClient, err := newStateClient(ctx, statePlugin.Conn, req.StateBackend) - if err != nil { - return status.Errorf(codes.Internal, "failed to create state client: %v", err) - } - syncOptions.StateBackend = stateClient - } - go func() { defer close(msgs) err := s.Plugin.Sync(ctx, syncOptions, msgs) diff --git a/internal/servers/plugin/v3/state.go b/internal/servers/plugin/v3/state.go deleted file mode 100644 index d7cff60018..0000000000 --- a/internal/servers/plugin/v3/state.go +++ /dev/null @@ -1,175 +0,0 @@ -package plugin - -import ( - "bytes" - "context" - "fmt" - "io" - - "github.com/apache/arrow/go/v13/arrow" - "github.com/apache/arrow/go/v13/arrow/array" - "github.com/apache/arrow/go/v13/arrow/ipc" - "github.com/apache/arrow/go/v13/arrow/memory" - pbDiscovery "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" - pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - pbPlugin "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" - "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/state" - "golang.org/x/exp/slices" - "google.golang.org/grpc" -) - -const stateTablePrefix = "cq_state_" -const keyColumn = "key" -const valueColumn = "value" - -type ClientV3 struct { - client pbPlugin.PluginClient - mem map[string]string - keys []string - values []string -} - -func newStateClient(ctx context.Context, conn *grpc.ClientConn, spec *pbPlugin.StateBackendSpec) (state.Client, error) { - discoveryClient := pbDiscovery.NewDiscoveryClient(conn) - versions, err := discoveryClient.GetVersions(ctx, &pbDiscovery.GetVersions_Request{}) - if err != nil { - return nil, err - } - if !slices.Contains(versions.Versions, 3) { - return nil, fmt.Errorf("please upgrade your state backend plugin") - } - - c := &ClientV3{ - client: pbPlugin.NewPluginClient(conn), - mem: make(map[string]string), - keys: make([]string, 0), - values: make([]string, 0), - } - name := spec.Name - table := &schema.Table{ - Name: stateTablePrefix + name, - Columns: []schema.Column{ - { - Name: keyColumn, - Type: arrow.BinaryTypes.String, - PrimaryKey: true, - }, - { - Name: valueColumn, - Type: arrow.BinaryTypes.String, - }, - }, - } - sc := table.ToArrowSchema() - tableBytes, err := pb.SchemaToBytes(sc) - if err != nil { - return nil, err - } - - if _, err := c.client.Init(ctx, &pbPlugin.Init_Request{ - Spec: spec.Spec, - }); err != nil { - return nil, err - } - - writeClient, err := c.client.Write(ctx) - if err != nil { - return nil, err - } - - if err := writeClient.Send(&pbPlugin.Write_Request{ - Message: &pbPlugin.Write_Request_MigrateTable{ - MigrateTable: &pbPlugin.MessageMigrateTable{ - Table: tableBytes, - }, - }, - }); err != nil { - return nil, err - } - - syncClient, err := c.client.Sync(ctx, &pbPlugin.Sync_Request{ - Tables: []string{stateTablePrefix + name}, - }) - if err != nil { - return nil, err - } - for { - res, err := syncClient.Recv() - if err != nil { - if err == io.EOF { - break - } - return nil, err - } - insertMessage := res.GetInsert() - if insertMessage == nil { - return nil, fmt.Errorf("unexpected message type %T", res) - } - rdr, err := ipc.NewReader(bytes.NewReader(insertMessage.Record)) - if err != nil { - return nil, err - } - for { - record, err := rdr.Read() - if err != nil { - if err == io.EOF { - break - } - return nil, err - } - keys := record.Columns()[0].(*array.String) - values := record.Columns()[1].(*array.String) - for i := 0; i < keys.Len(); i++ { - c.mem[keys.Value(i)] = values.Value(i) - } - } - } - return c, nil -} - -func (c *ClientV3) SetKey(_ context.Context, key string, value string) error { - c.mem[key] = value - return nil -} - -func (c *ClientV3) Flush(ctx context.Context) error { - bldr := array.NewRecordBuilder(memory.DefaultAllocator, nil) - for k, v := range c.mem { - bldr.Field(0).(*array.StringBuilder).Append(k) - bldr.Field(1).(*array.StringBuilder).Append(v) - } - rec := bldr.NewRecord() - var buf bytes.Buffer - wrtr := ipc.NewWriter(&buf, ipc.WithSchema(rec.Schema())) - if err := wrtr.Write(rec); err != nil { - return err - } - if err := wrtr.Close(); err != nil { - return err - } - writeClient, err := c.client.Write(ctx) - if err != nil { - return err - } - if err := writeClient.Send(&pbPlugin.Write_Request{ - Message: &pbPlugin.Write_Request_Insert{ - Insert: &pbPlugin.MessageInsert{ - Record: buf.Bytes(), - }, - }, - }); err != nil { - return err - } - if _, err := writeClient.CloseAndRecv(); err != nil { - return err - } - return nil -} - -func (c *ClientV3) GetKey(_ context.Context, key string) (string, error) { - if val, ok := c.mem[key]; ok { - return val, nil - } - return "", fmt.Errorf("key not found") -} diff --git a/plugin/plugin_destination.go b/plugin/plugin_destination.go index 900e040b02..2a1871152d 100644 --- a/plugin/plugin_destination.go +++ b/plugin/plugin_destination.go @@ -2,6 +2,7 @@ package plugin import ( "context" + "fmt" "github.com/apache/arrow/go/v13/arrow" "github.com/cloudquery/plugin-sdk/v4/message" @@ -35,5 +36,8 @@ func (p *Plugin) WriteAll(ctx context.Context, options WriteOptions, resources [ } func (p *Plugin) Write(ctx context.Context, options WriteOptions, res <-chan message.Message) error { + if p.client == nil { + return fmt.Errorf("plugin is not initialized. call Init first") + } return p.client.Write(ctx, options, res) } diff --git a/plugin/plugin_source.go b/plugin/plugin_source.go index 118bece1d8..e5cdf1ad9b 100644 --- a/plugin/plugin_source.go +++ b/plugin/plugin_source.go @@ -8,7 +8,6 @@ import ( "github.com/cloudquery/plugin-sdk/v4/glob" "github.com/cloudquery/plugin-sdk/v4/message" "github.com/cloudquery/plugin-sdk/v4/schema" - "github.com/cloudquery/plugin-sdk/v4/state" "github.com/rs/zerolog" ) @@ -16,7 +15,6 @@ type SyncOptions struct { Tables []string SkipTables []string DeterministicCQID bool - StateBackend state.Client } type SourceClient interface { @@ -96,6 +94,9 @@ func (p *Plugin) Sync(ctx context.Context, options SyncOptions, res chan<- messa return fmt.Errorf("plugin already in use") } defer p.mu.Unlock() + if p.client == nil { + return fmt.Errorf("plugin not initialized. call Init() first") + } // startTime := time.Now() if err := p.client.Sync(ctx, options, res); err != nil { diff --git a/serve/state_test.go b/serve/state_test.go new file mode 100644 index 0000000000..14cf2aa90d --- /dev/null +++ b/serve/state_test.go @@ -0,0 +1,83 @@ +package serve + +import ( + "context" + "sync" + "testing" + + pb "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + "github.com/cloudquery/plugin-sdk/v4/internal/clients/state/v3" + "github.com/cloudquery/plugin-sdk/v4/internal/memdb" + "github.com/cloudquery/plugin-sdk/v4/plugin" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func TestState(t *testing.T) { + p := plugin.NewPlugin( + "testPluginV3", + "v1.0.0", + memdb.NewMemDBClient) + srv := Plugin(p, WithArgs("serve"), WithTestListener()) + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + var wg sync.WaitGroup + wg.Add(1) + var serverErr error + go func() { + defer wg.Done() + serverErr = srv.Serve(ctx) + }() + defer func() { + cancel() + wg.Wait() + }() + + // https://stackoverflow.com/questions/42102496/testing-a-grpc-service + conn, err := grpc.DialContext(ctx, "bufnet", grpc.WithContextDialer(srv.bufPluginDialer), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + if err != nil { + t.Fatalf("Failed to dial bufnet: %v", err) + } + + c := pb.NewPluginClient(conn) + if _, err := c.Init(ctx, &pb.Init_Request{}); err != nil { + t.Fatal(err) + } + stateClient, err := state.NewClient(ctx, c, "test") + if err != nil { + t.Fatal(err) + } + + if err := stateClient.SetKey(ctx, "key", "value"); err != nil { + t.Fatal(err) + } + + val, err := stateClient.GetKey(ctx, "key") + if err != nil { + t.Fatal(err) + } + if val != "value" { + t.Fatalf("expected value to be value but got %s", val) + } + + if err := stateClient.Flush(ctx); err != nil { + t.Fatal(err) + } + stateClient, err = state.NewClient(ctx, c, "test") + if err != nil { + t.Fatal(err) + } + val, err = stateClient.GetKey(ctx, "key") + if err != nil { + t.Fatal(err) + } + if val != "value" { + t.Fatalf("expected value to be value but got %s", val) + } + + cancel() + wg.Wait() + if serverErr != nil { + t.Fatal(serverErr) + } +} diff --git a/state/state.go b/state/state.go index 55f070704e..d90b595ef2 100644 --- a/state/state.go +++ b/state/state.go @@ -1,8 +1,29 @@ package state -import "context" +import ( + "context" + "fmt" + + pbDiscovery "github.com/cloudquery/plugin-pb-go/pb/discovery/v1" + pbPluginV3 "github.com/cloudquery/plugin-pb-go/pb/plugin/v3" + stateV3 "github.com/cloudquery/plugin-sdk/v4/internal/clients/state/v3" + "golang.org/x/exp/slices" + "google.golang.org/grpc" +) type Client interface { SetKey(ctx context.Context, key string, value string) error GetKey(ctx context.Context, key string) (string, error) } + +func NewClient(ctx context.Context, conn *grpc.ClientConn, tableName string) (Client, error) { + discoveryClient := pbDiscovery.NewDiscoveryClient(conn) + versions, err := discoveryClient.GetVersions(ctx, &pbDiscovery.GetVersions_Request{}) + if err != nil { + return nil, err + } + if slices.Contains(versions.Versions, 3) { + return stateV3.NewClient(ctx, pbPluginV3.NewPluginClient(conn), tableName) + } + return nil, fmt.Errorf("please upgrade your state backend plugin. state supporting version 3 plugin has %v", versions.Versions) +} From 7dc094b44da78c3e55f2d6e9c042c6df1ee337aa Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Mon, 26 Jun 2023 09:39:52 +0100 Subject: [PATCH 123/125] batchWriter worker: Fix for batch size overflow check --- writers/batch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/writers/batch.go b/writers/batch.go index 1b5c3b57c3..510418c9bb 100644 --- a/writers/batch.go +++ b/writers/batch.go @@ -135,7 +135,7 @@ func (w *BatchWriter) worker(ctx context.Context, tableName string, ch <-chan *m resources = append(resources, r) sizeBytes += util.TotalRecordSize(r.Record) - if len(resources) >= w.batchSize || sizeBytes+util.TotalRecordSize(r.Record) >= int64(w.batchSizeBytes) { + if len(resources) >= w.batchSize || sizeBytes >= int64(w.batchSizeBytes) { w.flushTable(ctx, tableName, resources) resources = make([]*message.Insert, 0) sizeBytes = 0 From b7ede751f2e6ec8ca4ea48f4b6314f2f9c20ad85 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Mon, 26 Jun 2023 11:56:51 +0300 Subject: [PATCH 124/125] mod tidy --- go.mod | 12 ------------ go.sum | 35 ----------------------------------- 2 files changed, 47 deletions(-) diff --git a/go.mod b/go.mod index c8ef0b2b5f..f940e8ae0a 100644 --- a/go.mod +++ b/go.mod @@ -27,34 +27,22 @@ require ( replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13.0.0-20230626001500-065602842c3a require ( - github.com/andybalholm/brotli v1.0.5 // indirect - github.com/apache/thrift v0.16.0 // indirect - github.com/avast/retry-go/v4 v4.3.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/golang/protobuf v1.5.3 // indirect - github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v23.1.21+incompatible // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/klauspost/asmfmt v1.3.2 // indirect github.com/klauspost/compress v1.16.0 // indirect github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.18 // indirect - github.com/mattn/go-runewidth v0.0.14 // indirect - github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect - github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect - github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/pierrec/lz4/v4 v4.1.17 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/rivo/uniseg v0.2.0 // indirect - github.com/schollz/progressbar/v3 v3.13.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/mod v0.8.0 // indirect golang.org/x/net v0.9.0 // indirect golang.org/x/sys v0.7.0 // indirect - golang.org/x/term v0.7.0 // indirect golang.org/x/tools v0.6.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230530153820-e85fd2cbaebc // indirect diff --git a/go.sum b/go.sum index 6f540cd92b..76f7b2023a 100644 --- a/go.sum +++ b/go.sum @@ -33,13 +33,6 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= -github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= -github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= -github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= -github.com/avast/retry-go/v4 v4.3.4 h1:pHLkL7jvCvP317I8Ge+Km2Yhntv3SdkJm7uekkqbKhM= -github.com/avast/retry-go/v4 v4.3.4/go.mod h1:rv+Nla6Vk3/ilU0H51VHddWHiwimzX66yZ0JT6T+UvE= github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M= github.com/bradleyjkemp/cupaloy/v2 v2.8.0/go.mod h1:bm7JXdkRd4BHJk9HpwqAI8BoAY1lps46Enkdqw6aRX0= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -89,7 +82,6 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= -github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -107,8 +99,6 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/flatbuffers v23.1.21+incompatible h1:bUqzx/MXCDxuS0hRJL2EfjyZL3uQrPbMocUa8zGqsTA= @@ -151,11 +141,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= -github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= @@ -170,17 +157,8 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= -github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= -github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= -github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= -github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= -github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= -github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= @@ -191,8 +169,6 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= @@ -200,24 +176,16 @@ github.com/rs/zerolog v1.19.0/go.mod h1:IzD0RJ65iWH0w97OQQebJEvTZYvsCUm9WVLWBQrJ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc= github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= -github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= @@ -352,9 +320,6 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= -golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= From 440245354f2f7e0d127389cec117d9213d31b55e Mon Sep 17 00:00:00 2001 From: Erez Rokah Date: Mon, 26 Jun 2023 12:01:53 +0300 Subject: [PATCH 125/125] Update lint_markdown.yml --- .github/workflows/lint_markdown.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint_markdown.yml b/.github/workflows/lint_markdown.yml index cecd2c8bdc..aabcd9530d 100644 --- a/.github/workflows/lint_markdown.yml +++ b/.github/workflows/lint_markdown.yml @@ -16,7 +16,7 @@ jobs: - name: Vale uses: errata-ai/vale-action@v2 with: - vale_flags: "--glob=!{plugins/source/testdata/*,CHANGELOG.md,.github/styles/proselint/README.md}" + vale_flags: "--glob=!{docs/testdata/*,CHANGELOG.md,.github/styles/proselint/README.md}" filter_mode: nofilter env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} @@ -31,4 +31,4 @@ jobs: with: files: . config_file: .markdownlint.yaml - ignore_files: "{plugins/source/testdata/*,CHANGELOG.md}" + ignore_files: "{docs/testdata/*,CHANGELOG.md}"