Skip to content

Commit

Permalink
[tf] add additional prefix 'parquet' to firehose and athena table fil…
Browse files Browse the repository at this point in the history
…e location
  • Loading branch information
Chunyong Lin committed Mar 6, 2020
1 parent d00e41c commit f48532a
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
16 changes: 11 additions & 5 deletions terraform/modules/tf_globals/alerts_firehose/main.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
locals {
stream_name = "${var.prefix}_streamalert_alert_delivery"
bucket_arn = "arn:aws:s3:::${var.prefix}-streamalerts"
alerts_location = "s3://${var.prefix}-streamalerts/alerts"
stream_name = "${var.prefix}_streamalert_alert_delivery"
bucket_arn = "arn:aws:s3:::${var.prefix}-streamalerts"

# Athena reads all data stored under the 's3://bucketname/prefix/'. When the file
# format is Parquet, Athena would throw "HIVE_CANNOT_OPEN_SPLIT" when there are
# *.gz files.
# https://docs.aws.amazon.com/athena/latest/ug/tables-location-format.html
# So all data in parquet format will be saved s3 bucket with prefix "alerts/parquet".
alerts_location = "s3://${var.prefix}-streamalerts/alerts/parquet"
ser_de_params_key = var.file_format == "parquet" ? "serialization.format" : "ignore.malformed.json"
ser_de_params_value = var.file_format == "parquet" ? "1" : "true"
}
Expand Down Expand Up @@ -36,8 +42,8 @@ resource "aws_kinesis_firehose_delivery_stream" "streamalerts" {
content {
role_arn = aws_iam_role.firehose.arn
bucket_arn = local.bucket_arn
prefix = "alerts/dt=!{timestamp:yyyy-MM-dd-HH}/"
error_output_prefix = "alerts/!{firehose:error-output-type}/"
prefix = "alerts/parquet/dt=!{timestamp:yyyy-MM-dd-HH}/"
error_output_prefix = "alerts/parquet/!{firehose:error-output-type}/"
buffer_size = var.buffer_size
buffer_interval = var.buffer_interval

Expand Down
11 changes: 8 additions & 3 deletions terraform/modules/tf_kinesis_firehose_delivery_stream/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
//

locals {
data_location = "s3://${var.prefix}-streamalert-data/${var.log_name}"
# Athena reads all data stored under the 's3://bucketname/prefix/'. When the file
# format is Parquet, Athena would throw "HIVE_CANNOT_OPEN_SPLIT" when there are
# *.gz files.
# https://docs.aws.amazon.com/athena/latest/ug/tables-location-format.html
# So all data in parquet format will be saved s3 bucket with prefix "alerts/parquet".
data_location = "s3://${var.prefix}-streamalert-data/${var.log_name}/parquet"
ser_de_params_key = var.file_format == "parquet" ? "serialization.format" : "ignore.malformed.json"
ser_de_params_value = var.file_format == "parquet" ? "1" : "true"
}
Expand Down Expand Up @@ -34,8 +39,8 @@ resource "aws_kinesis_firehose_delivery_stream" "streamalert_data" {
content {
role_arn = var.role_arn
bucket_arn = "arn:aws:s3:::${var.s3_bucket_name}"
prefix = "${var.log_name}/dt=!{timestamp:yyyy-MM-dd-HH}/"
error_output_prefix = "${var.log_name}/!{firehose:error-output-type}/"
prefix = "${var.log_name}/parquet/dt=!{timestamp:yyyy-MM-dd-HH}/"
error_output_prefix = "${var.log_name}/parquet/!{firehose:error-output-type}/"
buffer_size = var.buffer_size
buffer_interval = var.buffer_interval

Expand Down

0 comments on commit f48532a

Please sign in to comment.