Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,30 @@ df = wr.s3.read_parquet("s3://bucket/dataset/", dataset=True)
df = wr.athena.read_sql_query("SELECT * FROM my_table", database="my_db")

# Getting Redshift connection (SQLAlchemy) from Glue Catalog Connections
engine = wr.catalog.get_engine("my-redshift-connection")

# Retrieving the data from Amazon Redshift Spectrum
engine = wr.catalog.get_engine("my-redshift-connection")
df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)

# Creating QuickSight Data Source and Dataset to reflect our new table
wr.quicksight.create_athena_data_source("athena-source", allowed_to_manage=["username"])
wr.quicksight.create_athena_dataset(
name="my-dataset",
database="my_db",
table="my_table",
data_source_name="athena-source",
allowed_to_manage=["username"]
)

# Getting MySQL connection (SQLAlchemy) from Glue Catalog Connections
# Load the data into MySQL
engine = wr.catalog.get_engine("my-mysql-connection")
wr.db.to_sql(df, engine, schema="test", name="my_table")

# Getting PostgreSQL connection (SQLAlchemy) from Glue Catalog Connections
# Load the data into PostgreSQL
engine = wr.catalog.get_engine("my-postgresql-connection")
wr.db.to_sql(df, engine, schema="test", name="my_table")

```

## [Read The Docs](https://aws-data-wrangler.readthedocs.io/)
Expand Down Expand Up @@ -80,13 +100,15 @@ df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
- [015 - EMR](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/015%20-%20EMR.ipynb)
- [016 - EMR & Docker](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/016%20-%20EMR%20%26%20Docker.ipynb)
- [017 - Partition Projection](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/017%20-%20Partition%20Projection.ipynb)
- [018 - QuickSight](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/018%20-%20QuickSight.ipynb)
- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/latest/api.html)
- [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-s3)
- [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#aws-glue-catalog)
- [Amazon Athena](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-athena)
- [Databases (Redshift, PostgreSQL, MySQL)](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#databases-redshift-postgresql-mysql)
- [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#emr-cluster)
- [CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#cloudwatch-logs)
- [QuickSight](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#quicksight)
- [**License**](https://github.com/awslabs/aws-data-wrangler/blob/master/LICENSE)
- [**Contributing**](https://github.com/awslabs/aws-data-wrangler/blob/master/CONTRIBUTING.md)
- [**Legacy Docs** (pre-1.0.0)](https://aws-data-wrangler.readthedocs.io/en/legacy/)
2 changes: 1 addition & 1 deletion awswrangler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import logging

from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, s3 # noqa
from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, quicksight, s3 # noqa
from awswrangler.__metadata__ import __description__, __license__, __title__, __version__ # noqa
from awswrangler._utils import get_account_id # noqa

Expand Down
28 changes: 28 additions & 0 deletions awswrangler/_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,34 @@ def athena2redshift( # pylint: disable=too-many-branches,too-many-return-statem
raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover


def athena2quicksight(dtype: str) -> str: # pylint: disable=too-many-branches,too-many-return-statements
"""Athena to Quicksight data types conversion."""
dtype = dtype.lower()
if dtype == "smallint":
return "INTEGER"
if dtype in ("int", "integer"):
return "INTEGER"
if dtype == "bigint":
return "INTEGER"
if dtype == "float":
return "DECIMAL"
if dtype == "double":
return "DECIMAL"
if dtype in ("boolean", "bool"):
return "BOOLEAN"
if dtype in ("string", "char", "varchar"):
return "STRING"
if dtype == "timestamp":
return "DATETIME"
if dtype == "date":
return "DATETIME"
if dtype.startswith("decimal"):
return "DECIMAL"
if dtype in ("binary" or "varbinary"):
return "BIT"
raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover


def pyarrow2athena(dtype: pa.DataType) -> str: # pylint: disable=too-many-branches,too-many-return-statements
"""Pyarrow to Athena data types conversion."""
if pa.types.is_int8(dtype):
Expand Down
64 changes: 40 additions & 24 deletions awswrangler/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,9 @@ def get_table_types(
dtypes: Dict[str, str] = {}
for col in response["Table"]["StorageDescriptor"]["Columns"]:
dtypes[col["Name"]] = col["Type"]
for par in response["Table"]["PartitionKeys"]:
dtypes[par["Name"]] = par["Type"]
if "PartitionKeys" in response["Table"]:
for par in response["Table"]["PartitionKeys"]:
dtypes[par["Name"]] = par["Type"]
return dtypes


Expand Down Expand Up @@ -527,6 +528,11 @@ def get_tables(
) -> Iterator[Dict[str, Any]]:
"""Get an iterator of tables.

Note
----
Please, does not filter using name_contains and name_prefix/name_suffix at the same time.
Only name_prefix and name_suffix can be combined together.

Parameters
----------
catalog_id : str, optional
Expand Down Expand Up @@ -560,15 +566,17 @@ def get_tables(
if catalog_id is not None:
args["CatalogId"] = catalog_id
if (name_prefix is not None) and (name_suffix is not None) and (name_contains is not None):
args["Expression"] = f"{name_prefix}.*{name_contains}.*{name_suffix}"
raise exceptions.InvalidArgumentCombination("Please, does not filter using name_contains and "
"name_prefix/name_suffix at the same time. Only "
"name_prefix and name_suffix can be combined together.")
elif (name_prefix is not None) and (name_suffix is not None):
args["Expression"] = f"{name_prefix}.*{name_suffix}"
args["Expression"] = f"{name_prefix}*{name_suffix}"
elif name_contains is not None:
args["Expression"] = f".*{name_contains}.*"
args["Expression"] = f"*{name_contains}*"
elif name_prefix is not None:
args["Expression"] = f"{name_prefix}.*"
args["Expression"] = f"{name_prefix}*"
elif name_suffix is not None:
args["Expression"] = f".*{name_suffix}"
args["Expression"] = f"*{name_suffix}"
if database is not None:
dbs: List[str] = [database]
else:
Expand Down Expand Up @@ -647,15 +655,21 @@ def tables(
tbls = tbls[:limit]

df_dict: Dict[str, List] = {"Database": [], "Table": [], "Description": [], "Columns": [], "Partitions": []}
for table in tbls:
df_dict["Database"].append(table["DatabaseName"])
df_dict["Table"].append(table["Name"])
if "Description" in table:
df_dict["Description"].append(table["Description"])
for tbl in tbls:
df_dict["Database"].append(tbl["DatabaseName"])
df_dict["Table"].append(tbl["Name"])
if "Description" in tbl:
df_dict["Description"].append(tbl["Description"])
else:
df_dict["Description"].append("")
df_dict["Columns"].append(", ".join([x["Name"] for x in table["StorageDescriptor"]["Columns"]]))
df_dict["Partitions"].append(", ".join([x["Name"] for x in table["PartitionKeys"]]))
if "Columns" in tbl["StorageDescriptor"]:
df_dict["Columns"].append(", ".join([x["Name"] for x in tbl["StorageDescriptor"]["Columns"]]))
else:
df_dict["Columns"].append("")
if "PartitionKeys" in tbl:
df_dict["Partitions"].append(", ".join([x["Name"] for x in tbl["PartitionKeys"]]))
else:
df_dict["Partitions"].append("")
return pd.DataFrame(data=df_dict)


Expand Down Expand Up @@ -771,14 +785,15 @@ def table(
df_dict["Comment"].append(col["Comment"])
else:
df_dict["Comment"].append("")
for col in tbl["PartitionKeys"]:
df_dict["Column Name"].append(col["Name"])
df_dict["Type"].append(col["Type"])
df_dict["Partition"].append(True)
if "Comment" in col:
df_dict["Comment"].append(col["Comment"])
else:
df_dict["Comment"].append("")
if "PartitionKeys" in tbl:
for col in tbl["PartitionKeys"]:
df_dict["Column Name"].append(col["Name"])
df_dict["Type"].append(col["Type"])
df_dict["Partition"].append(True)
if "Comment" in col:
df_dict["Comment"].append(col["Comment"])
else:
df_dict["Comment"].append("")
return pd.DataFrame(data=df_dict)


Expand Down Expand Up @@ -1692,8 +1707,9 @@ def get_columns_comments(
comments: Dict[str, str] = {}
for c in response["Table"]["StorageDescriptor"]["Columns"]:
comments[c["Name"]] = c["Comment"]
for p in response["Table"]["PartitionKeys"]:
comments[p["Name"]] = p["Comment"]
if "PartitionKeys" in response["Table"]:
for p in response["Table"]["PartitionKeys"]:
comments[p["Name"]] = p["Comment"]
return comments


Expand Down
43 changes: 43 additions & 0 deletions awswrangler/quicksight/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Amazon QuickSight Module."""

from awswrangler.quicksight._cancel import cancel_ingestion # noqa
from awswrangler.quicksight._create import create_athena_data_source, create_athena_dataset, create_ingestion # noqa
from awswrangler.quicksight._delete import ( # noqa
delete_all_dashboards,
delete_all_data_sources,
delete_all_datasets,
delete_all_templates,
delete_dashboard,
delete_data_source,
delete_dataset,
delete_template,
)
from awswrangler.quicksight._describe import ( # noqa
describe_dashboard,
describe_data_source,
describe_data_source_permissions,
describe_dataset,
describe_ingestion,
)
from awswrangler.quicksight._get_list import ( # noqa
get_dashboard_id,
get_dashboard_ids,
get_data_source_arn,
get_data_source_arns,
get_data_source_id,
get_data_source_ids,
get_dataset_id,
get_dataset_ids,
get_template_id,
get_template_ids,
list_dashboards,
list_data_sources,
list_datasets,
list_group_memberships,
list_groups,
list_iam_policy_assignments,
list_iam_policy_assignments_for_user,
list_ingestions,
list_templates,
list_users,
)
58 changes: 58 additions & 0 deletions awswrangler/quicksight/_cancel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Amazon QuickSight Cancel Module."""

import logging
from typing import Optional

import boto3 # type: ignore

from awswrangler import _utils, exceptions
from awswrangler.quicksight._get_list import get_dataset_id

_logger: logging.Logger = logging.getLogger(__name__)


def cancel_ingestion(
ingestion_id: str = None,
dataset_name: Optional[str] = None,
dataset_id: Optional[str] = None,
account_id: Optional[str] = None,
boto3_session: Optional[boto3.Session] = None,
) -> None:
"""Cancel an ongoing ingestion of data into SPICE.

Note
----
You must pass a not None value for ``dataset_name`` or ``dataset_id`` argument.

Parameters
----------
ingestion_id : str
Ingestion ID.
dataset_name : str, optional
Dataset name.
dataset_id : str, optional
Dataset ID.
account_id : str, optional
If None, the account ID will be inferred from your boto3 session.
boto3_session : boto3.Session(), optional
Boto3 Session. The default boto3 session will be used if boto3_session receive None.

Returns
-------
None
None.

Examples
--------
>>> import awswrangler as wr
>>> wr.quicksight.cancel_ingestion(ingestion_id="...", dataset_name="...")
"""
if (dataset_name is None) and (dataset_id is None):
raise exceptions.InvalidArgument("You must pass a not None name or dataset_id argument.")
session: boto3.Session = _utils.ensure_session(session=boto3_session)
if account_id is None:
account_id = _utils.get_account_id(boto3_session=session)
if (dataset_id is None) and (dataset_name is not None):
dataset_id = get_dataset_id(name=dataset_name, account_id=account_id, boto3_session=session)
client: boto3.client = _utils.client(service_name="quicksight", session=session)
client.cancel_ingestion(IngestionId=ingestion_id, AwsAccountId=account_id, DataSetId=dataset_id)
Loading