Skip to content

Commit

Permalink
feat(aws)!: Migrate to arrow native SDK (#10797)
Browse files Browse the repository at this point in the history
BEGIN_COMMIT_OVERRIDE
feat: Update to use [Apache Arrow](https://arrow.apache.org/) type
system

BREAKING-CHANGE: This release introduces an internal change to our type
system to use [Apache Arrow](https://arrow.apache.org/). This should not
have any visible breaking changes, however due to the size of the change
we are introducing it under a major version bump to communicate that it
might have some bugs that we weren't able to catch during our internal
tests. If you encounter an issue during the upgrade, please submit a
[bug
report](https://github.com/cloudquery/cloudquery/issues/new/choose).

You will also need to update destinations depending on which one you
use:
Azure Blob Storage >= v3.2.0
BigQuery >= v3.0.0
ClickHouse >= v3.1.1
DuckDB >= v1.1.6
Elasticsearch >= v2.0.0
File >= v3.2.0
Firehose >= v2.0.2
GCS >= v3.2.0
Gremlin >= v2.1.10
Kafka >= v3.0.1
Meilisearch >= v2.0.1
MSSQL >= v4.2.0
MongoDB >= v2.0.1
MySQL >= v2.0.2
Neo4j >= v3.0.0
PostgreSQL >= v4.2.0
S3 >= v4.4.0
Snowflake >= v2.1.1
SQLite >= v2.2.0


END_COMMIT_OVERRIDE

---------

Co-authored-by: Herman Schaaf <hermanschaaf@gmail.com>
Co-authored-by: bbernays <ben@cloudquery.io>
  • Loading branch information
3 people committed May 25, 2023
1 parent 8fbcbae commit e355d14
Show file tree
Hide file tree
Showing 1,631 changed files with 15,659 additions and 15,580 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/source_aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
- name: Setup CloudQuery
uses: cloudquery/setup-cloudquery@v3
with:
version: 'v2.5.3'
version: 'v3.5.0'
- name: Migrate DB
run: cloudquery migrate test/policy_cq_config.yml
env:
Expand Down
6 changes: 3 additions & 3 deletions plugins/source/aws/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import (
wafv2types "github.com/aws/aws-sdk-go-v2/service/wafv2/types"
"github.com/aws/smithy-go/logging"
"github.com/cloudquery/plugin-pb-go/specs"
"github.com/cloudquery/plugin-sdk/v2/backend"
"github.com/cloudquery/plugin-sdk/v2/plugins/source"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/backend"
"github.com/cloudquery/plugin-sdk/v3/plugins/source"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/rs/zerolog"
"github.com/thoas/go-funk"
"golang.org/x/sync/errgroup"
Expand Down
33 changes: 14 additions & 19 deletions plugins/source/aws/client/columns.go
Original file line number Diff line number Diff line change
@@ -1,38 +1,33 @@
package client

import (
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/apache/arrow/go/v13/arrow"
"github.com/cloudquery/plugin-sdk/v3/schema"
)

func DefaultAccountIDColumn(pk bool) schema.Column {
return schema.Column{
Name: "account_id",
Type: schema.TypeString,
Resolver: ResolveAWSAccount,
CreationOptions: schema.ColumnCreationOptions{
PrimaryKey: pk,
},
Name: "account_id",
Type: arrow.BinaryTypes.String,
Resolver: ResolveAWSAccount,
PrimaryKey: pk,
}
}

func DefaultRegionColumn(pk bool) schema.Column {
return schema.Column{
Name: "region",
Type: schema.TypeString,
Resolver: ResolveAWSRegion,
CreationOptions: schema.ColumnCreationOptions{
PrimaryKey: pk,
},
Name: "region",
Type: arrow.BinaryTypes.String,
Resolver: ResolveAWSRegion,
PrimaryKey: pk,
}
}

func LanguageCodeColumn(pk bool) schema.Column {
return schema.Column{
Name: "language_code",
Type: schema.TypeString,
Resolver: ResolveLanguageCode,
CreationOptions: schema.ColumnCreationOptions{
PrimaryKey: pk,
},
Name: "language_code",
Type: arrow.BinaryTypes.String,
Resolver: ResolveLanguageCode,
PrimaryKey: pk,
}
}
4 changes: 2 additions & 2 deletions plugins/source/aws/client/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import (

"github.com/aws/aws-sdk-go-v2/aws/arn"
"github.com/aws/smithy-go"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v2/transformers"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/cloudquery/plugin-sdk/v3/transformers"
)

type AWSService string
Expand Down
16 changes: 9 additions & 7 deletions plugins/source/aws/client/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ import (
"errors"
"testing"

"github.com/apache/arrow/go/v13/arrow"
"github.com/aws/aws-sdk-go-v2/aws"
ttypes "github.com/aws/aws-sdk-go-v2/service/acm/types"
"github.com/aws/aws-sdk-go-v2/service/apigateway/types"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/scalar"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand All @@ -31,8 +33,8 @@ func TestResolveARN(t *testing.T) {
func(resource *schema.Resource) ([]string, error) {
return []string{"restapis", *resource.Item.(types.RestApi).Id}, nil
},
schema.NewResourceData(&schema.Table{Columns: []schema.Column{{Name: "myarn", Type: schema.TypeString}}}, nil, types.RestApi{Id: aws.String("myid")}),
&schema.Text{Status: schema.Present, Str: "arn:aws:apigateway:region::restapis/myid"},
schema.NewResourceData(&schema.Table{Columns: []schema.Column{{Name: "myarn", Type: arrow.BinaryTypes.String}}}, nil, types.RestApi{Id: aws.String("myid")}),
&scalar.String{Valid: true, Value: "arn:aws:apigateway:region::restapis/myid"},
false,
},
{
Expand All @@ -42,8 +44,8 @@ func TestResolveARN(t *testing.T) {
func(resource *schema.Resource) ([]string, error) {
return []string{"", "restapis", *resource.Item.(types.RestApi).Id}, nil
},
schema.NewResourceData(&schema.Table{Columns: []schema.Column{{Name: "myarn", Type: schema.TypeString}}}, nil, types.RestApi{Id: aws.String("myid")}),
&schema.Text{Status: schema.Present, Str: "arn:aws:apigateway:region::/restapis/myid"},
schema.NewResourceData(&schema.Table{Columns: []schema.Column{{Name: "myarn", Type: arrow.BinaryTypes.String}}}, nil, types.RestApi{Id: aws.String("myid")}),
&scalar.String{Valid: true, Value: "arn:aws:apigateway:region::/restapis/myid"},
false,
},
{
Expand All @@ -53,8 +55,8 @@ func TestResolveARN(t *testing.T) {
func(resource *schema.Resource) ([]string, error) {
return nil, errors.New("test")
},
schema.NewResourceData(&schema.Table{Columns: []schema.Column{{Name: "myarn", Type: schema.TypeString}}}, nil, types.RestApi{Id: aws.String("myid")}),
&schema.Text{Status: schema.Undefined},
schema.NewResourceData(&schema.Table{Columns: []schema.Column{{Name: "myarn", Type: arrow.BinaryTypes.String}}}, nil, types.RestApi{Id: aws.String("myid")}),
&scalar.String{},
true,
},
}
Expand Down
2 changes: 1 addition & 1 deletion plugins/source/aws/client/multiplexers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"sort"

wafv2types "github.com/aws/aws-sdk-go-v2/service/wafv2/types"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/schema"
)

var AllNamespaces = []string{ // this is only used in applicationautoscaling
Expand Down
2 changes: 1 addition & 1 deletion plugins/source/aws/client/resolvers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"fmt"
"reflect"

"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/mitchellh/hashstructure/v2"
)

Expand Down
9 changes: 6 additions & 3 deletions plugins/source/aws/client/resolvers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import (
"context"
"testing"

"github.com/cloudquery/plugin-sdk/v3/scalar"
sdkTypes "github.com/cloudquery/plugin-sdk/v3/types"

"github.com/aws/aws-sdk-go-v2/aws"
types1 "github.com/aws/aws-sdk-go-v2/service/codepipeline/types"
types2 "github.com/aws/aws-sdk-go-v2/service/redshift/types"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -51,14 +54,14 @@ func TestResolveTags(t *testing.T) {
Columns: []schema.Column{
{
Name: "tags",
Type: schema.TypeJSON,
Type: sdkTypes.ExtensionTypes.JSON,
},
},
}
r := schema.NewResourceData(ta, nil, tc.InputItem)
err := ResolveTags(context.Background(), nil, r, ta.Columns[0])
assert.NoError(t, err)
expectedJson := &schema.JSON{}
expectedJson := &scalar.JSON{}
_ = expectedJson.Set(tc.ExpectedTags)
assert.Equal(t, expectedJson, r.Get(ta.Columns[0].Name))
}
Expand Down
2 changes: 1 addition & 1 deletion plugins/source/aws/client/tableoptions/cloudtrail_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package tableoptions
import (
"testing"

"github.com/cloudquery/plugin-sdk/v2/faker"
"github.com/cloudquery/plugin-sdk/v3/faker"
"github.com/stretchr/testify/assert"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ import (
"github.com/aws/aws-sdk-go-v2/service/inspector2"

inspector2types "github.com/aws/aws-sdk-go-v2/service/inspector2/types"
"github.com/cloudquery/plugin-sdk/v2/faker"
"github.com/cloudquery/plugin-sdk/v3/caser"
"github.com/cloudquery/plugin-sdk/v3/faker"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)
Expand Down
16 changes: 10 additions & 6 deletions plugins/source/aws/client/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@ package client

import (
"context"
"encoding/json"
"fmt"
"os"
"testing"
"time"

"github.com/apache/arrow/go/v13/arrow"
"github.com/cloudquery/plugin-pb-go/specs"
"github.com/cloudquery/plugin-sdk/v2/plugins/source"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/plugins/source"
"github.com/cloudquery/plugin-sdk/v3/scalar"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/cloudquery/plugin-sdk/v3/types"
"github.com/golang/mock/gomock"
"github.com/rs/zerolog"
)
Expand Down Expand Up @@ -73,17 +77,17 @@ func validateTagStructure(t *testing.T, plugin *source.Plugin, resources []*sche
if column.Name != "tags" {
continue
}
if column.Type != schema.TypeJSON {
if !arrow.TypeEqual(column.Type, types.ExtensionTypes.JSON) {
t.Fatalf("tags column in %s should be of type JSON", table.Name)
}
for _, resource := range resources {
if resource.Table.Name != table.Name {
continue
}
value := resource.Get(column.Name)
val, ok := value.Get().(map[string]any)
if !ok {
t.Fatalf("unexpected type for tags column: got %v, want type map[string]any", val)
var tags map[string]any
if err := json.Unmarshal(value.(*scalar.JSON).Value, &tags); err != nil {
t.Fatalf("failed to unmarshal tags column %s: %v", value.(*scalar.JSON).Value, err)
}
}
}
Expand Down
15 changes: 8 additions & 7 deletions plugins/source/aws/client/transformers.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,27 @@ import (
"strings"
"time"

"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v2/transformers"
"github.com/apache/arrow/go/v13/arrow"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/cloudquery/plugin-sdk/v3/transformers"
"github.com/thoas/go-funk"
)

func TimestampTypeTransformer(field reflect.StructField) (schema.ValueType, error) {
func TimestampTypeTransformer(field reflect.StructField) (arrow.DataType, error) {
if !strings.HasSuffix(field.Name, "At") {
return schema.TypeInvalid, nil // fallback
return nil, nil // fallback
}

switch field.Type {
case reflect.TypeOf(""), reflect.TypeOf(new(string)):
return schema.TypeTimestamp, nil
return arrow.FixedWidthTypes.Timestamp_us, nil
default:
return schema.TypeInvalid, nil // fallback
return nil, nil // fallback
}
}

func TimestampResolverTransformer(field reflect.StructField, path string) schema.ColumnResolver {
if t, _ := TimestampTypeTransformer(field); t != schema.TypeTimestamp {
if t, _ := TimestampTypeTransformer(field); t != arrow.FixedWidthTypes.Timestamp_us {
return transformers.DefaultResolverTransformer(field, path)
}

Expand Down
7 changes: 4 additions & 3 deletions plugins/source/aws/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/cloudquery/cloudquery/plugins/source/aws
go 1.19

require (
github.com/apache/arrow/go/v13 v13.0.0-20230509040948-de6c3cd2b604
github.com/aws/aws-sdk-go-v2 v1.18.0
github.com/aws/aws-sdk-go-v2/config v1.18.25
github.com/aws/aws-sdk-go-v2/credentials v1.13.24
Expand Down Expand Up @@ -117,8 +118,7 @@ require (
github.com/basgys/goxml2json v1.1.0
github.com/cloudquery/codegen v0.2.1
github.com/cloudquery/plugin-pb-go v1.0.8
github.com/cloudquery/plugin-sdk/v2 v2.7.0
github.com/cloudquery/plugin-sdk/v3 v3.0.1
github.com/cloudquery/plugin-sdk/v3 v3.6.4
github.com/gocarina/gocsv v0.0.0-20230325173030-9a18a846a479
github.com/golang/mock v1.6.0
github.com/google/go-cmp v0.5.9
Expand All @@ -137,7 +137,6 @@ replace github.com/apache/arrow/go/v13 => github.com/cloudquery/arrow/go/v13 v13

require (
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/apache/arrow/go/v13 v13.0.0-20230509040948-de6c3cd2b604 // indirect
github.com/apache/thrift v0.16.0 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3 // indirect
Expand All @@ -154,6 +153,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10 // indirect
github.com/bitly/go-simplejson v0.5.0 // indirect
github.com/cloudquery/plugin-sdk v1.24.0
github.com/cloudquery/plugin-sdk/v2 v2.7.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/getsentry/sentry-go v0.20.0 // indirect
github.com/ghodss/yaml v1.0.0
Expand All @@ -173,6 +173,7 @@ require (
github.com/mattn/go-isatty v0.0.18 // indirect
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
github.com/pierrec/lz4/v4 v4.1.15 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/cast v1.5.0 // indirect
github.com/spf13/cobra v1.6.1 // indirect
Expand Down
5 changes: 3 additions & 2 deletions plugins/source/aws/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,8 @@ github.com/cloudquery/plugin-sdk v1.24.0 h1:vgi3RImpSlnhacg/zbrpsbUEsYpG51UB5KVk
github.com/cloudquery/plugin-sdk v1.24.0/go.mod h1:teMPyCON3uPdMsHvzpSiOg+IK2sOR5Tf9dYLreoURzI=
github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U=
github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug=
github.com/cloudquery/plugin-sdk/v3 v3.0.1 h1:5l3dG4AIrAWadc0aEiht5au2gM/wHLRSK2qSzao1Sm0=
github.com/cloudquery/plugin-sdk/v3 v3.0.1/go.mod h1:cJP020H448wknQfjCDo0HR0b3vt9kYcjrEWtmV3YIgc=
github.com/cloudquery/plugin-sdk/v3 v3.6.4 h1:P4OkS5tJYkv3OqeL60DAVqXXbFQUyPKJ5YDtAgjl9b4=
github.com/cloudquery/plugin-sdk/v3 v3.6.4/go.mod h1:3JrZXEULmGXpkOukVaRIzaA63d7TJr9Ukp6hemTjbtc=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
Expand Down Expand Up @@ -471,6 +471,7 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0=
github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down
2 changes: 1 addition & 1 deletion plugins/source/aws/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package main

import (
"github.com/cloudquery/cloudquery/plugins/source/aws/resources/plugin"
"github.com/cloudquery/plugin-sdk/v2/serve"
"github.com/cloudquery/plugin-sdk/v3/serve"
)

const sentryDSN = "https://6c6b72bc946844cb8471f49eba485cde@o1396617.ingest.sentry.io/6747636"
Expand Down
6 changes: 3 additions & 3 deletions plugins/source/aws/resources/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import (
"strings"

"github.com/cloudquery/cloudquery/plugins/source/aws/client"
"github.com/cloudquery/plugin-sdk/v2/caser"
"github.com/cloudquery/plugin-sdk/v2/plugins/source"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/caser"
"github.com/cloudquery/plugin-sdk/v3/plugins/source"
"github.com/cloudquery/plugin-sdk/v3/schema"
)

var (
Expand Down
2 changes: 1 addition & 1 deletion plugins/source/aws/resources/plugin/tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ import (
"github.com/cloudquery/cloudquery/plugins/source/aws/resources/services/wafv2"
"github.com/cloudquery/cloudquery/plugins/source/aws/resources/services/workspaces"
"github.com/cloudquery/cloudquery/plugins/source/aws/resources/services/xray"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v3/schema"
)

func tables() []*schema.Table {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package accessanalyzer
import (
"context"

"github.com/apache/arrow/go/v13/arrow"
"github.com/aws/aws-sdk-go-v2/service/accessanalyzer"
"github.com/aws/aws-sdk-go-v2/service/accessanalyzer/types"
"github.com/cloudquery/cloudquery/plugins/source/aws/client"
"github.com/cloudquery/plugin-sdk/v2/schema"
"github.com/cloudquery/plugin-sdk/v2/transformers"
"github.com/cloudquery/plugin-sdk/v3/schema"
"github.com/cloudquery/plugin-sdk/v3/transformers"
)

func analyzerArchiveRules() *schema.Table {
Expand All @@ -21,7 +22,7 @@ func analyzerArchiveRules() *schema.Table {
client.DefaultRegionColumn(false),
{
Name: "analyzer_arn",
Type: schema.TypeString,
Type: arrow.BinaryTypes.String,
Resolver: schema.ParentColumnResolver("arn"),
},
},
Expand Down

0 comments on commit e355d14

Please sign in to comment.