Added support for workgroup and made output location optional with Am…

…azon Athena - closes #320
ankane · Sep 21, 2021 · c44ca45 · c44ca45
1 parent 65c46f0
commit c44ca45
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,9 @@
 ## 2.4.6 (unreleased)
 
+- Added support for workgroup with Amazon Athena
 - Added casting for timestamp with time zone columns with Amazon Athena
 - Added support for setting credentials in config file with Amazon Athena
+- Made output location optional with Amazon Athena
 - Fixed casting error for `NULL` values with Amazon Athena
 
 ## 2.4.5 (2021-09-15)

diff --git a/README.md b/README.md
@@ -620,9 +620,12 @@ data_sources:
   my_source:
     adapter: athena
     database: database
+
+    # optional settings
     output_location: s3://some-bucket/
-    access_key_id: ...     # optional [unreleased]
-    secret_access_key: ... # optional [unreleased]
+    workgroup: primary
+    access_key_id: ...
+    secret_access_key: ...
 ```
 
 Here’s an example IAM policy:

diff --git a/lib/blazer/adapters/athena_adapter.rb b/lib/blazer/adapters/athena_adapter.rb
@@ -8,19 +8,25 @@ def run_statement(statement, comment)
         rows = []
         error = nil
 
+        query_options = {
+          query_string: statement,
+          # use token so we fetch cached results after query is run
+          client_request_token: Digest::MD5.hexdigest([statement, data_source.id].join("/")),
+          query_execution_context: {
+            database: database,
+          }
+        }
+
+        if settings["output_location"]
+          query_options[:result_configuration] = {output_location: settings["output_location"]}
+        end
+
+        if settings["workgroup"]
+          query_options[:work_group] = settings["workgroup"]
+        end
+
         begin
-          resp =
-            client.start_query_execution(
-              query_string: statement,
-              # use token so we fetch cached results after query is run
-              client_request_token: Digest::MD5.hexdigest([statement,data_source.id].join("/")),
-              query_execution_context: {
-                database: database,
-              },
-              result_configuration: {
-                output_location: settings["output_location"]
-              }
-            )
+          resp = client.start_query_execution(**query_options)
           query_execution_id = resp.query_execution_id
 
           timeout = data_source.timeout || 300