In [None]:
import boto3

In [None]:
import os

In [None]:
os.environ.setdefault('AWS_DEFAULT_REGION', 'us-east-1')

In [None]:
athena_client = boto3.client('athena')

In [None]:
athena_client.list_work_groups?

In [None]:
athena_client.list_work_groups()

* Listing Amazon Athena Databases using Python boto3

In [None]:
athena_client.list_databases?

In [None]:
databases = athena_client.list_databases(CatalogName='AwsDataCatalog')

In [None]:
type(databases)

In [None]:
type(databases['DatabaseList'])

In [None]:
[database['Name'] for database in databases['DatabaseList']]

* Listing Amazon Athena Database Tables using Python boto3

In [None]:
athena_client.list_table_metadata?

In [None]:
athena_client.list_table_metadata(
    CatalogName='AwsDataCatalog',
    DatabaseName='retail_db'
)

In [None]:
tables = athena_client.list_table_metadata(
    CatalogName='AwsDataCatalog',
    DatabaseName='retail_db'
)

In [None]:
type(tables)

In [None]:
tables['TableMetadataList']

In [None]:
[table['Name'] for table in tables['TableMetadataList']]

In [None]:
athena_client.get_table_metadata?

In [None]:
athena_client.get_table_metadata(
    CatalogName='AwsDataCatalog',
    DatabaseName='retail_db',
    TableName='orders'
)

In [None]:
table_metadata = athena_client.get_table_metadata(
    CatalogName='AwsDataCatalog',
    DatabaseName='retail_db',
    TableName='orders'
)

In [None]:
table_metadata

In [None]:
table_metadata['TableMetadata']['TableType']

In [None]:
table_metadata['TableMetadata']['Columns']

In [None]:
table_metadata['TableMetadata']['Parameters']['location']

In [None]:
[table['Parameters']['location'] for table in tables['TableMetadataList']]

* Run Queries on Athena Tables using Python boto3

In [None]:
athena_client.start_query_execution?

In [None]:
athena_client.list_work_groups()

In [None]:
athena_client.get_work_group(WorkGroup='primary')

In [None]:
query_execution = athena_client.start_query_execution(
    QueryString='SELECT count(*) FROM myretail.orders'
)

In [None]:
query_execution

In [None]:
athena_client.get_query_execution(
    QueryExecutionId=query_execution['QueryExecutionId']
)

In [None]:
!aws s3 ls s3://itvathena/wgprimary/79e570cb-6d51-4408-84f4-bc81556ec9d0.csv

* Process Athena Query Results using Python

In [None]:
athena_client.get_table_metadata(
    CatalogName='AwsDataCatalog',
    DatabaseName='myretail',
    TableName='orders'
)

In [None]:
query_str = """
    SELECT order_status, count(*) AS order_count
    FROM myretail.orders
    GROUP BY order_status
"""

In [None]:
query_execution = athena_client.start_query_execution(
    QueryString=query_str
)

In [None]:
athena_client.get_query_execution(
    QueryExecutionId=query_execution['QueryExecutionId']
)

In [None]:
query_results = athena_client.get_query_results(
    QueryExecutionId=query_execution['QueryExecutionId']
)

In [None]:
query_results

In [None]:
query_results['ResultSet']

In [None]:
query_results['ResultSet']['Rows']

* Custom Location for Athena query results using Python boto3

```shell
pip install pandas
pip install fsspec
pip install s3fs
```

In [None]:
query_str = """
    SELECT order_status, count(*) AS order_count
    FROM myretail.orders
    GROUP BY order_status
"""

In [None]:
athena_client.start_query_execution?

In [None]:
query_execution = athena_client.start_query_execution(
    QueryString=query_str,
    ResultConfiguration={'OutputLocation': 's3://itv-retail/myretail/order_status_count_boto3'}
)

In [None]:
athena_client.get_query_execution(
    QueryExecutionId=query_execution['QueryExecutionId']
)

In [None]:
!aws s3 ls s3://itv-retail/myretail/order_status_count_boto3/8d388c59-cd56-4a1f-850e-c4225fcaed74.csv

In [None]:
query_results = athena_client.get_query_results(
    QueryExecutionId=query_execution['QueryExecutionId']
)

In [None]:
query_results['ResultSet']['Rows']

In [None]:
import pandas as pd

In [None]:
data = pd.read_csv('s3://itv-retail/myretail/order_status_count_boto3/8d388c59-cd56-4a1f-850e-c4225fcaed74.csv')

In [None]:
type(data)

In [None]:
data

* Running CTAS against Athena using Python boto3

In [90]:
query_str = """
CREATE TABLE myretail.orders_part
WITH (
    format = 'TEXTFILE',
    external_location = 's3://itv-retail/myretail/orders_part/',
    field_delimiter = ',',
    partitioned_by = ARRAY['order_month']
)
AS
SELECT o.*,
    cast(replace(substring(order_date, 1, 7), '-', '') AS INT) AS order_month
FROM myretail.orders AS o
"""

In [94]:
query_execution = athena_client.start_query_execution(
    QueryString=query_str
)

In [96]:
athena_client.get_query_execution(
    QueryExecutionId=query_execution['QueryExecutionId']
)

{'QueryExecution': {'QueryExecutionId': '323c6f93-f175-4ee3-807f-9bd91f490e07',
  'Query': "CREATE TABLE myretail.orders_part\nWITH (\n    format = 'TEXTFILE',\n    external_location = 's3://itv-retail/myretail/orders_part/',\n    field_delimiter = ',',\n    partitioned_by = ARRAY['order_month']\n)\nAS\nSELECT o.*,\n    cast(replace(substring(order_date, 1, 7), '-', '') AS INT) AS order_month\nFROM myretail.orders AS o",
  'StatementType': 'DDL',
  'ResultConfiguration': {'OutputLocation': 's3://itvathena/wgprimary/tables/323c6f93-f175-4ee3-807f-9bd91f490e07'},
  'QueryExecutionContext': {},
  'Status': {'State': 'SUCCEEDED',
   'SubmissionDateTime': datetime.datetime(2021, 9, 26, 9, 43, 30, 629000, tzinfo=tzlocal()),
   'CompletionDateTime': datetime.datetime(2021, 9, 26, 9, 43, 35, 287000, tzinfo=tzlocal())},
  'Statistics': {'EngineExecutionTimeInMillis': 4333,
   'DataScannedInBytes': 327238,
   'DataManifestLocation': 's3://itvathena/wgprimary/tables/323c6f93-f175-4ee3-807f-9bd91f