Skip to content
This repository has been archived by the owner on Aug 16, 2022. It is now read-only.

Commit

Permalink
feat: Kinesis Firehose Support (#1359)
Browse files Browse the repository at this point in the history

#### Summary

<!--
Explain what problem this PR addresses
-->

---
  • Loading branch information
bbernays committed Aug 12, 2022
1 parent 1f1ae96 commit 4324f6b
Show file tree
Hide file tree
Showing 17 changed files with 2,434 additions and 8 deletions.
3 changes: 3 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"github.com/aws/aws-sdk-go-v2/service/elasticsearchservice"
"github.com/aws/aws-sdk-go-v2/service/emr"
"github.com/aws/aws-sdk-go-v2/service/eventbridge"
"github.com/aws/aws-sdk-go-v2/service/firehose"
"github.com/aws/aws-sdk-go-v2/service/fsx"
"github.com/aws/aws-sdk-go-v2/service/glue"
"github.com/aws/aws-sdk-go-v2/service/guardduty"
Expand Down Expand Up @@ -159,6 +160,7 @@ type Services struct {
ELBv2 ElbV2Client
EMR EmrClient
EventBridge EventBridgeClient
Firehose FirehoseClient
FSX FsxClient
Glue GlueClient
GuardDuty GuardDutyClient
Expand Down Expand Up @@ -663,6 +665,7 @@ func initServices(region string, c aws.Config) Services {
ELBv2: elbv2.NewFromConfig(awsCfg),
EMR: emr.NewFromConfig(awsCfg),
EventBridge: eventbridge.NewFromConfig(awsCfg),
Firehose: firehose.NewFromConfig(awsCfg),
FSX: fsx.NewFromConfig(awsCfg),
Glue: glue.NewFromConfig(awsCfg),
GuardDuty: guardduty.NewFromConfig(awsCfg),
Expand Down
96 changes: 96 additions & 0 deletions client/mocks/firehose.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions client/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/aws/aws-sdk-go-v2/service/elasticsearchservice"
"github.com/aws/aws-sdk-go-v2/service/emr"
"github.com/aws/aws-sdk-go-v2/service/eventbridge"
"github.com/aws/aws-sdk-go-v2/service/firehose"
"github.com/aws/aws-sdk-go-v2/service/fsx"
"github.com/aws/aws-sdk-go-v2/service/glue"
"github.com/aws/aws-sdk-go-v2/service/guardduty"
Expand Down Expand Up @@ -416,6 +417,13 @@ type EventBridgeClient interface {
ListTagsForResource(ctx context.Context, params *eventbridge.ListTagsForResourceInput, optFns ...func(*eventbridge.Options)) (*eventbridge.ListTagsForResourceOutput, error)
}

//go:generate mockgen -package=mocks -destination=./mocks/firehose.go . FirehoseClient
type FirehoseClient interface {
DescribeDeliveryStream(ctx context.Context, params *firehose.DescribeDeliveryStreamInput, optFns ...func(*firehose.Options)) (*firehose.DescribeDeliveryStreamOutput, error)
ListDeliveryStreams(ctx context.Context, params *firehose.ListDeliveryStreamsInput, optFns ...func(*firehose.Options)) (*firehose.ListDeliveryStreamsOutput, error)
ListTagsForDeliveryStream(ctx context.Context, params *firehose.ListTagsForDeliveryStreamInput, optFns ...func(*firehose.Options)) (*firehose.ListTagsForDeliveryStreamOutput, error)
}

//go:generate mockgen -package=mocks -destination=./mocks/mock_fsx.go . FsxClient
type FsxClient interface {
DescribeBackups(ctx context.Context, params *fsx.DescribeBackupsInput, optFns ...func(*fsx.Options)) (*fsx.DescribeBackupsOutput, error)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

# Table: aws_firehose_delivery_stream_elasticsearch_destination
The destination description in Amazon ES
## Columns
| Name | Type | Description |
| ------------- | ------------- | ----- |
|delivery_stream_cq_id|uuid|Unique CloudQuery ID of aws_firehose_delivery_streams table (FK)|
|processing_configuration_processors|jsonb|Describes a data processing configuration|
|buffering_hints_interval_in_seconds|bigint|Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination|
|buffering_hints_size_in_mb_s|bigint|Buffer incoming data to the specified size, in MBs, before delivering it to the destination|
|cloud_watch_logging_options_enabled|boolean|Enables or disables CloudWatch logging|
|cloud_watch_logging_options_log_group_name|text|The CloudWatch group name for logging|
|cloud_watch_logging_options_log_stream_name|text|The CloudWatch log stream name for logging|
|cluster_endpoint|text|The endpoint to use when communicating with the cluster|
|domain_arn|text|The ARN of the Amazon ES domain|
|index_name|text|The Elasticsearch index name|
|index_rotation_period|text|The Elasticsearch index rotation period|
|processing_configuration_enabled|boolean|Enables or disables data processing|
|retry_options_duration_in_seconds|bigint|After an initial failure to deliver to Amazon ES, the total amount of time during which Kinesis Data Firehose retries delivery (including the first attempt)|
|role_arn|text|The Amazon Resource Name (ARN) of the AWS credentials|
|s3_backup_mode|text|The Amazon S3 backup mode|
|s3_destination_bucket_arn|text|The ARN of the S3 bucket|
|s3_destination_buffering_hints_interval_in_seconds|bigint|Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination|
|s3_destination_buffering_hints_size_in_mb_s|bigint|Buffer incoming data to the specified size, in MiBs, before delivering it to the destination|
|s3_destination_compression_format|text|The compression format|
|s3_destination_kms_encryption_config_aws_kms_key_arn|text|The Amazon Resource Name (ARN) of the encryption key|
|s3_destination_no_encryption_config|text|Specifically override existing encryption information to ensure that no encryption is used|
|s3_destination_role_arn|text|The Amazon Resource Name (ARN) of the AWS credentials|
|s3_destination_cloud_watch_logging_options_enabled|boolean|Enables or disables CloudWatch logging|
|s3_destination_cloud_watch_logging_options_log_group_name|text|The CloudWatch group name for logging|
|s3_destination_cloud_watch_logging_options_log_stream_name|text|The CloudWatch log stream name for logging|
|s3_destination_error_output_prefix|text|A prefix that Kinesis Data Firehose evaluates and adds to failed records before writing them to S3|
|s3_destination_prefix|text|The "YYYY/MM/DD/HH" time format prefix is automatically used for delivered Amazon S3 files|
|type_name|text|The Elasticsearch type name|
|vpc_configuration_description_role_arn|text|The ARN of the IAM role that the delivery stream uses to create endpoints in the destination VPC|
|vpc_configuration_description_security_group_ids|text[]|The IDs of the security groups that Kinesis Data Firehose uses when it creates ENIs in the VPC of the Amazon ES destination|
|vpc_configuration_description_subnet_ids|text[]|The IDs of the subnets that Kinesis Data Firehose uses to create ENIs in the VPC of the Amazon ES destination|
|vpc_configuration_description_vpc_id|text|The ID of the Amazon ES destination's VPC|
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

# Table: aws_firehose_delivery_stream_extended_s3_destination
Describes a destination in Amazon S3
## Columns
| Name | Type | Description |
| ------------- | ------------- | ----- |
|delivery_stream_cq_id|uuid|Unique CloudQuery ID of aws_firehose_delivery_streams table (FK)|
|processing_configuration_processors|jsonb|Describes a data processing configuration|
|bucket_arn|text|The ARN of the S3 bucket|
|buffering_hints_interval_in_seconds|bigint|Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination|
|buffering_hints_size_in_mb_s|bigint|Buffer incoming data to the specified size, in MiBs, before delivering it to the destination|
|compression_format|text|The compression format|
|encryption_configuration_kms_encryption_config_aws_kms_key_arn|text|The Amazon Resource Name (ARN) of the encryption key|
|encryption_configuration_no_encryption_config|text|Specifically override existing encryption information to ensure that no encryption is used|
|role_arn|text|The Amazon Resource Name (ARN) of the AWS credentials|
|cloud_watch_logging_options_enabled|boolean|Enables or disables CloudWatch logging|
|cloud_watch_logging_options_log_group_name|text|The CloudWatch group name for logging|
|cloud_watch_logging_options_log_stream_name|text|The CloudWatch log stream name for logging|
|enabled|boolean|Defaults to true|
|deserializer_hive_json_ser_de_timestamp_formats|text[]|Indicates how you want Kinesis Data Firehose to parse the date and timestamps that may be present in your input data JSON|
|deserializer_open_x_json_ser_de_case_insensitive|boolean|When set to true, which is the default, Kinesis Data Firehose converts JSON keys to lowercase before deserializing them|
|deserializer_open_x_json_ser_de_column_to_json_key_mappings|jsonb|Maps column names to JSON keys that aren't identical to the column names|
|deserializer_open_x_json_ser_de_convert_dots_to_underscores|boolean|When set to true, specifies that the names of the keys include dots and that you want Kinesis Data Firehose to replace them with underscores|
|serializer_orc_ser_de_block_size_bytes|bigint|The Hadoop Distributed File System (HDFS) block size|
|serializer_orc_ser_de_bloom_filter_columns|text[]|The column names for which you want Kinesis Data Firehose to create bloom filters|
|serializer_orc_ser_de_bloom_filter_false_positive_probability|float|The Bloom filter false positive probability (FPP)|
|serializer_orc_ser_de_compression|text|The compression code to use over data blocks|
|serializer_orc_ser_de_dictionary_key_threshold|float|Represents the fraction of the total number of non-null rows|
|serializer_orc_ser_de_enable_padding|boolean|Set this to true to indicate that you want stripes to be padded to the HDFS block boundaries|
|serializer_orc_ser_de_format_version|text|The version of the file to write|
|serializer_orc_ser_de_padding_tolerance|float|A number between 0 and 1 that defines the tolerance for block padding as a decimal fraction of stripe size|
|serializer_orc_ser_de_row_index_stride|bigint|The number of rows between index entries|
|serializer_orc_ser_de_stripe_size_bytes|bigint|The number of bytes in each stripe|
|serializer_parquet_ser_de_block_size_bytes|bigint|The Hadoop Distributed File System (HDFS) block size|
|serializer_parquet_ser_de_compression|text|The compression code to use over data blocks|
|serializer_parquet_ser_de_enable_dictionary_compression|boolean|Indicates whether to enable dictionary compression|
|serializer_parquet_ser_de_max_padding_bytes|bigint|The maximum amount of padding to apply|
|serializer_parquet_ser_de_page_size_bytes|bigint|The Parquet page size|
|serializer_parquet_ser_de_writer_version|text|Indicates the version of row format to output|
|schema_configuration_catalog_id|text|The ID of the AWS Glue Data Catalog|
|schema_configuration_database_name|text|Specifies the name of the AWS Glue database that contains the schema for the output data|
|schema_configuration_region|text|If you don't specify an AWS Region, the default is the current Region|
|schema_configuration_role_arn|text|The role that Kinesis Data Firehose can use to access AWS Glue|
|schema_configuration_table_name|text|Specifies the AWS Glue table that contains the column information that constitutes your data schema|
|schema_configuration_version_id|text|Specifies the table version for the output data schema|
|dynamic_partitioning_enabled|boolean|Specifies that the dynamic partitioning is enabled for this Kinesis Data Firehose delivery stream|
|dynamic_partitioning_retry_options_duration_in_seconds|bigint|The period of time during which Kinesis Data Firehose retries to deliver data to the specified Amazon S3 prefix|
|error_output_prefix|text|A prefix that Kinesis Data Firehose evaluates and adds to failed records before writing them to S3|
|prefix|text|The "YYYY/MM/DD/HH" time format prefix is automatically used for delivered Amazon S3 files|
|processing_configuration_enabled|boolean|Enables or disables data processing|
|s3_backup_bucket_arn|text|The ARN of the S3 bucket|
|s3_backup_buffering_hints_interval_in_seconds|bigint|Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination|
|s3_backup_buffering_hints_size_in_mb_s|bigint|Buffer incoming data to the specified size, in MiBs, before delivering it to the destination|
|s3_backup_compression_format|text|The compression format|
|s3_backup_kms_encryption_config_aws_kms_key_arn|text|The Amazon Resource Name (ARN) of the encryption key|
|s3_backup_no_encryption_config|text|Specifically override existing encryption information to ensure that no encryption is used|
|s3_backup_role_arn|text|The Amazon Resource Name (ARN) of the AWS credentials|
|s3_backup_cloud_watch_logging_options_enabled|boolean|Enables or disables CloudWatch logging|
|s3_backup_cloud_watch_logging_options_log_group_name|text|The CloudWatch group name for logging|
|s3_backup_cloud_watch_logging_options_log_stream_name|text|The CloudWatch log stream name for logging|
|s3_backup_error_output_prefix|text|A prefix that Kinesis Data Firehose evaluates and adds to failed records before writing them to S3|
|s3_backup_prefix|text|The "YYYY/MM/DD/HH" time format prefix is automatically used for delivered Amazon S3 files|
|s3_backup_mode|text|The Amazon S3 backup mode|
33 changes: 33 additions & 0 deletions docs/tables/aws_firehose_delivery_stream_http_destination.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

# Table: aws_firehose_delivery_stream_http_destination
Describes the HTTP endpoint destination
## Columns
| Name | Type | Description |
| ------------- | ------------- | ----- |
|delivery_stream_cq_id|uuid|Unique CloudQuery ID of aws_firehose_delivery_streams table (FK)|
|processing_configuration_processors|jsonb|Describes a data processing configuration|
|buffering_hints_interval_in_seconds|bigint|Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination|
|buffering_hints_size_in_mb_s|bigint|Buffer incoming data to the specified size, in MBs, before delivering it to the destination|
|cloud_watch_logging_options_enabled|boolean|Enables or disables CloudWatch logging|
|cloud_watch_logging_options_log_group_name|text|The CloudWatch group name for logging|
|cloud_watch_logging_options_log_stream_name|text|The CloudWatch log stream name for logging|
|endpoint_configuration_name|text|The name of the HTTP endpoint selected as the destination|
|endpoint_configuration_url|text|The URL of the HTTP endpoint selected as the destination|
|processing_configuration_enabled|boolean|Enables or disables data processing|
|request_configuration_common_attributes|jsonb|Describes the metadata sent to the HTTP endpoint destination|
|request_configuration_content_encoding|text|Kinesis Data Firehose uses the content encoding to compress the body of a request before sending the request to the destination|
|retry_options_duration_in_seconds|bigint|The total amount of time that Kinesis Data Firehose spends on retries|
|role_arn|text|Kinesis Data Firehose uses this IAM role for all the permissions that the delivery stream needs|
|s3_backup_mode|text|Describes the S3 bucket backup options for the data that Kinesis Firehose delivers to the HTTP endpoint destination|
|s3_destination_bucket_arn|text|The ARN of the S3 bucket|
|s3_destination_buffering_hints_interval_in_seconds|bigint|Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination|
|s3_destination_buffering_hints_size_in_mb_s|bigint|Buffer incoming data to the specified size, in MiBs, before delivering it to the destination|
|s3_destination_compression_format|text|The compression format|
|s3_destination_kms_encryption_config_aws_kms_key_arn|text|The Amazon Resource Name (ARN) of the encryption key|
|s3_destination_no_encryption_config|text|Specifically override existing encryption information to ensure that no encryption is used|
|s3_destination_role_arn|text|The Amazon Resource Name (ARN) of the AWS credentials|
|s3_destination_cloud_watch_logging_options_enabled|boolean|Enables or disables CloudWatch logging|
|s3_destination_cloud_watch_logging_options_log_group_name|text|The CloudWatch group name for logging|
|s3_destination_cloud_watch_logging_options_log_stream_name|text|The CloudWatch log stream name for logging|
|s3_destination_error_output_prefix|text|A prefix that Kinesis Data Firehose evaluates and adds to failed records before writing them to S3|
|s3_destination_prefix|text|The "YYYY/MM/DD/HH" time format prefix is automatically used for delivered Amazon S3 files|

0 comments on commit 4324f6b

Please sign in to comment.