diff --git a/README.md b/README.md index cabd20ea3..cb27ae253 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,30 @@ df = wr.s3.read_parquet("s3://bucket/dataset/", dataset=True) df = wr.athena.read_sql_query("SELECT * FROM my_table", database="my_db") # Getting Redshift connection (SQLAlchemy) from Glue Catalog Connections -engine = wr.catalog.get_engine("my-redshift-connection") - # Retrieving the data from Amazon Redshift Spectrum +engine = wr.catalog.get_engine("my-redshift-connection") df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine) + +# Creating QuickSight Data Source and Dataset to reflect our new table +wr.quicksight.create_athena_data_source("athena-source", allowed_to_manage=["username"]) +wr.quicksight.create_athena_dataset( + name="my-dataset", + database="my_db", + table="my_table", + data_source_name="athena-source", + allowed_to_manage=["username"] +) + +# Getting MySQL connection (SQLAlchemy) from Glue Catalog Connections +# Load the data into MySQL +engine = wr.catalog.get_engine("my-mysql-connection") +wr.db.to_sql(df, engine, schema="test", name="my_table") + +# Getting PostgreSQL connection (SQLAlchemy) from Glue Catalog Connections +# Load the data into PostgreSQL +engine = wr.catalog.get_engine("my-postgresql-connection") +wr.db.to_sql(df, engine, schema="test", name="my_table") + ``` ## [Read The Docs](https://aws-data-wrangler.readthedocs.io/) @@ -80,6 +100,7 @@ df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine) - [015 - EMR](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/015%20-%20EMR.ipynb) - [016 - EMR & Docker](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/016%20-%20EMR%20%26%20Docker.ipynb) - [017 - Partition Projection](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/017%20-%20Partition%20Projection.ipynb) + - [018 - QuickSight](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/018%20-%20QuickSight.ipynb) - [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/latest/api.html) - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-s3) - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#aws-glue-catalog) @@ -87,6 +108,7 @@ df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine) - [Databases (Redshift, PostgreSQL, MySQL)](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#databases-redshift-postgresql-mysql) - [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#emr-cluster) - [CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#cloudwatch-logs) + - [QuickSight](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#quicksight) - [**License**](https://github.com/awslabs/aws-data-wrangler/blob/master/LICENSE) - [**Contributing**](https://github.com/awslabs/aws-data-wrangler/blob/master/CONTRIBUTING.md) - [**Legacy Docs** (pre-1.0.0)](https://aws-data-wrangler.readthedocs.io/en/legacy/) diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py index 4413ab5f4..9aff3abcd 100644 --- a/awswrangler/__init__.py +++ b/awswrangler/__init__.py @@ -7,7 +7,7 @@ import logging -from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, s3 # noqa +from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, quicksight, s3 # noqa from awswrangler.__metadata__ import __description__, __license__, __title__, __version__ # noqa from awswrangler._utils import get_account_id # noqa diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py index 50cc0e372..e0ffcf208 100644 --- a/awswrangler/_data_types.py +++ b/awswrangler/_data_types.py @@ -114,6 +114,34 @@ def athena2redshift( # pylint: disable=too-many-branches,too-many-return-statem raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover +def athena2quicksight(dtype: str) -> str: # pylint: disable=too-many-branches,too-many-return-statements + """Athena to Quicksight data types conversion.""" + dtype = dtype.lower() + if dtype == "smallint": + return "INTEGER" + if dtype in ("int", "integer"): + return "INTEGER" + if dtype == "bigint": + return "INTEGER" + if dtype == "float": + return "DECIMAL" + if dtype == "double": + return "DECIMAL" + if dtype in ("boolean", "bool"): + return "BOOLEAN" + if dtype in ("string", "char", "varchar"): + return "STRING" + if dtype == "timestamp": + return "DATETIME" + if dtype == "date": + return "DATETIME" + if dtype.startswith("decimal"): + return "DECIMAL" + if dtype in ("binary" or "varbinary"): + return "BIT" + raise exceptions.UnsupportedType(f"Unsupported Athena type: {dtype}") # pragma: no cover + + def pyarrow2athena(dtype: pa.DataType) -> str: # pylint: disable=too-many-branches,too-many-return-statements """Pyarrow to Athena data types conversion.""" if pa.types.is_int8(dtype): diff --git a/awswrangler/catalog.py b/awswrangler/catalog.py index 5ea578d20..aa2f34b85 100644 --- a/awswrangler/catalog.py +++ b/awswrangler/catalog.py @@ -439,8 +439,9 @@ def get_table_types( dtypes: Dict[str, str] = {} for col in response["Table"]["StorageDescriptor"]["Columns"]: dtypes[col["Name"]] = col["Type"] - for par in response["Table"]["PartitionKeys"]: - dtypes[par["Name"]] = par["Type"] + if "PartitionKeys" in response["Table"]: + for par in response["Table"]["PartitionKeys"]: + dtypes[par["Name"]] = par["Type"] return dtypes @@ -527,6 +528,11 @@ def get_tables( ) -> Iterator[Dict[str, Any]]: """Get an iterator of tables. + Note + ---- + Please, does not filter using name_contains and name_prefix/name_suffix at the same time. + Only name_prefix and name_suffix can be combined together. + Parameters ---------- catalog_id : str, optional @@ -560,15 +566,17 @@ def get_tables( if catalog_id is not None: args["CatalogId"] = catalog_id if (name_prefix is not None) and (name_suffix is not None) and (name_contains is not None): - args["Expression"] = f"{name_prefix}.*{name_contains}.*{name_suffix}" + raise exceptions.InvalidArgumentCombination("Please, does not filter using name_contains and " + "name_prefix/name_suffix at the same time. Only " + "name_prefix and name_suffix can be combined together.") elif (name_prefix is not None) and (name_suffix is not None): - args["Expression"] = f"{name_prefix}.*{name_suffix}" + args["Expression"] = f"{name_prefix}*{name_suffix}" elif name_contains is not None: - args["Expression"] = f".*{name_contains}.*" + args["Expression"] = f"*{name_contains}*" elif name_prefix is not None: - args["Expression"] = f"{name_prefix}.*" + args["Expression"] = f"{name_prefix}*" elif name_suffix is not None: - args["Expression"] = f".*{name_suffix}" + args["Expression"] = f"*{name_suffix}" if database is not None: dbs: List[str] = [database] else: @@ -647,15 +655,21 @@ def tables( tbls = tbls[:limit] df_dict: Dict[str, List] = {"Database": [], "Table": [], "Description": [], "Columns": [], "Partitions": []} - for table in tbls: - df_dict["Database"].append(table["DatabaseName"]) - df_dict["Table"].append(table["Name"]) - if "Description" in table: - df_dict["Description"].append(table["Description"]) + for tbl in tbls: + df_dict["Database"].append(tbl["DatabaseName"]) + df_dict["Table"].append(tbl["Name"]) + if "Description" in tbl: + df_dict["Description"].append(tbl["Description"]) else: df_dict["Description"].append("") - df_dict["Columns"].append(", ".join([x["Name"] for x in table["StorageDescriptor"]["Columns"]])) - df_dict["Partitions"].append(", ".join([x["Name"] for x in table["PartitionKeys"]])) + if "Columns" in tbl["StorageDescriptor"]: + df_dict["Columns"].append(", ".join([x["Name"] for x in tbl["StorageDescriptor"]["Columns"]])) + else: + df_dict["Columns"].append("") + if "PartitionKeys" in tbl: + df_dict["Partitions"].append(", ".join([x["Name"] for x in tbl["PartitionKeys"]])) + else: + df_dict["Partitions"].append("") return pd.DataFrame(data=df_dict) @@ -771,14 +785,15 @@ def table( df_dict["Comment"].append(col["Comment"]) else: df_dict["Comment"].append("") - for col in tbl["PartitionKeys"]: - df_dict["Column Name"].append(col["Name"]) - df_dict["Type"].append(col["Type"]) - df_dict["Partition"].append(True) - if "Comment" in col: - df_dict["Comment"].append(col["Comment"]) - else: - df_dict["Comment"].append("") + if "PartitionKeys" in tbl: + for col in tbl["PartitionKeys"]: + df_dict["Column Name"].append(col["Name"]) + df_dict["Type"].append(col["Type"]) + df_dict["Partition"].append(True) + if "Comment" in col: + df_dict["Comment"].append(col["Comment"]) + else: + df_dict["Comment"].append("") return pd.DataFrame(data=df_dict) @@ -1692,8 +1707,9 @@ def get_columns_comments( comments: Dict[str, str] = {} for c in response["Table"]["StorageDescriptor"]["Columns"]: comments[c["Name"]] = c["Comment"] - for p in response["Table"]["PartitionKeys"]: - comments[p["Name"]] = p["Comment"] + if "PartitionKeys" in response["Table"]: + for p in response["Table"]["PartitionKeys"]: + comments[p["Name"]] = p["Comment"] return comments diff --git a/awswrangler/quicksight/__init__.py b/awswrangler/quicksight/__init__.py new file mode 100644 index 000000000..47b1f0b8a --- /dev/null +++ b/awswrangler/quicksight/__init__.py @@ -0,0 +1,43 @@ +"""Amazon QuickSight Module.""" + +from awswrangler.quicksight._cancel import cancel_ingestion # noqa +from awswrangler.quicksight._create import create_athena_data_source, create_athena_dataset, create_ingestion # noqa +from awswrangler.quicksight._delete import ( # noqa + delete_all_dashboards, + delete_all_data_sources, + delete_all_datasets, + delete_all_templates, + delete_dashboard, + delete_data_source, + delete_dataset, + delete_template, +) +from awswrangler.quicksight._describe import ( # noqa + describe_dashboard, + describe_data_source, + describe_data_source_permissions, + describe_dataset, + describe_ingestion, +) +from awswrangler.quicksight._get_list import ( # noqa + get_dashboard_id, + get_dashboard_ids, + get_data_source_arn, + get_data_source_arns, + get_data_source_id, + get_data_source_ids, + get_dataset_id, + get_dataset_ids, + get_template_id, + get_template_ids, + list_dashboards, + list_data_sources, + list_datasets, + list_group_memberships, + list_groups, + list_iam_policy_assignments, + list_iam_policy_assignments_for_user, + list_ingestions, + list_templates, + list_users, +) diff --git a/awswrangler/quicksight/_cancel.py b/awswrangler/quicksight/_cancel.py new file mode 100644 index 000000000..cf27cdf45 --- /dev/null +++ b/awswrangler/quicksight/_cancel.py @@ -0,0 +1,58 @@ +"""Amazon QuickSight Cancel Module.""" + +import logging +from typing import Optional + +import boto3 # type: ignore + +from awswrangler import _utils, exceptions +from awswrangler.quicksight._get_list import get_dataset_id + +_logger: logging.Logger = logging.getLogger(__name__) + + +def cancel_ingestion( + ingestion_id: str = None, + dataset_name: Optional[str] = None, + dataset_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> None: + """Cancel an ongoing ingestion of data into SPICE. + + Note + ---- + You must pass a not None value for ``dataset_name`` or ``dataset_id`` argument. + + Parameters + ---------- + ingestion_id : str + Ingestion ID. + dataset_name : str, optional + Dataset name. + dataset_id : str, optional + Dataset ID. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.cancel_ingestion(ingestion_id="...", dataset_name="...") + """ + if (dataset_name is None) and (dataset_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or dataset_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (dataset_id is None) and (dataset_name is not None): + dataset_id = get_dataset_id(name=dataset_name, account_id=account_id, boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + client.cancel_ingestion(IngestionId=ingestion_id, AwsAccountId=account_id, DataSetId=dataset_id) diff --git a/awswrangler/quicksight/_create.py b/awswrangler/quicksight/_create.py new file mode 100644 index 000000000..d659e2ce1 --- /dev/null +++ b/awswrangler/quicksight/_create.py @@ -0,0 +1,390 @@ +"""Amazon QuickSight Create Module.""" + +import logging +import uuid +from typing import Any, Dict, List, Optional, Union + +import boto3 # type: ignore + +from awswrangler import _utils, exceptions +from awswrangler.quicksight._get_list import get_data_source_arn, get_dataset_id +from awswrangler.quicksight._utils import extract_athena_query_columns, extract_athena_table_columns + +_logger: logging.Logger = logging.getLogger(__name__) + +_ALLOWED_ACTIONS: Dict[str, Dict[str, List[str]]] = { + "data_source": { + "allowed_to_use": [ + "quicksight:DescribeDataSource", + "quicksight:DescribeDataSourcePermissions", + "quicksight:PassDataSource", + ], + "allowed_to_manage": [ + "quicksight:DescribeDataSource", + "quicksight:DescribeDataSourcePermissions", + "quicksight:PassDataSource", + "quicksight:UpdateDataSource", + "quicksight:DeleteDataSource", + "quicksight:UpdateDataSourcePermissions", + ], + }, + "dataset": { + "allowed_to_use": [ + "quicksight:DescribeDataSet", + "quicksight:DescribeDataSetPermissions", + "quicksight:PassDataSet", + "quicksight:DescribeIngestion", + "quicksight:ListIngestions", + ], + "allowed_to_manage": [ + "quicksight:DescribeDataSet", + "quicksight:DescribeDataSetPermissions", + "quicksight:PassDataSet", + "quicksight:DescribeIngestion", + "quicksight:ListIngestions", + "quicksight:UpdateDataSet", + "quicksight:DeleteDataSet", + "quicksight:CreateIngestion", + "quicksight:CancelIngestion", + "quicksight:UpdateDataSetPermissions", + ], + }, +} + + +def _generate_principal(user_name: str, account_id: str, region: str) -> str: + user_name = user_name if "/" in user_name else f"default/{user_name}" + return f"arn:aws:quicksight:{region}:{account_id}:user/{user_name}" + + +def _generate_permissions( + resource: str, + account_id: str, + boto3_session: boto3.Session, + allowed_to_use: Optional[List[str]] = None, + allowed_to_manage: Optional[List[str]] = None, +) -> List[Dict[str, Union[str, List[str]]]]: + permissions: List[Dict[str, Union[str, List[str]]]] = [] + if (allowed_to_use is None) and (allowed_to_manage is None): + return permissions + + # Forcing same principal not be in both lists at the same time. + if (allowed_to_use is not None) and (allowed_to_manage is not None): + allowed_to_use = list(set(allowed_to_use) - set(allowed_to_manage)) + + region: str = _utils.get_region_from_session(boto3_session=boto3_session) + if allowed_to_use is not None: + permissions += [ + { + "Principal": _generate_principal(user_name=user_name, account_id=account_id, region=region), + "Actions": _ALLOWED_ACTIONS[resource]["allowed_to_use"], + } + for user_name in allowed_to_use + ] + if allowed_to_manage is not None: + permissions += [ + { + "Principal": _generate_principal(user_name=user_name, account_id=account_id, region=region), + "Actions": _ALLOWED_ACTIONS[resource]["allowed_to_manage"], + } + for user_name in allowed_to_manage + ] + return permissions + + +def _generate_transformations( + rename_columns: Optional[Dict[str, str]], cast_columns_types=Optional[Dict[str, str]] +) -> List[Dict[str, Dict[str, Any]]]: + trans: List[Dict[str, Dict[str, Any]]] = [] + if rename_columns is not None: + for k, v in rename_columns.items(): + trans.append({"RenameColumnOperation": {"ColumnName": k, "NewColumnName": v}}) + if cast_columns_types is not None: + for k, v in cast_columns_types.items(): + trans.append({"CastColumnTypeOperation": {"ColumnName": k, "NewColumnType": v.upper()}}) + return trans + + +def create_athena_data_source( + name: str, + workgroup: str = "primary", + allowed_to_use: Optional[List[str]] = None, + allowed_to_manage: Optional[List[str]] = None, + tags: Optional[Dict[str, str]] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> None: + """Create a QuickSight data source pointing to an Athena/Workgroup. + + Note + ---- + You will not be able to see the the data source in the console + if you not pass your user to one of the ``allowed_*`` arguments. + + Parameters + ---------- + name : str + Data source name. + workgroup : str + Athena workgroup. + tags : Dict[str, str], optional + Key/Value collection to put on the Cluster. + e.g. {"foo": "boo", "bar": "xoo"}) + allowed_to_use : optional + List of principals that will be allowed to see and use the data source. + e.g. ["John"] + allowed_to_manage : optional + List of principals that will be allowed to see, use, update and delete the data source. + e.g. ["Mary"] + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.create_athena_data_source( + ... name="...", + ... allowed_to_manage=["john"] + ... ) + """ + session: boto3.Session = _utils.ensure_session(session=boto3_session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + args: Dict[str, Any] = { + "AwsAccountId": account_id, + "DataSourceId": name, + "Name": name, + "Type": "ATHENA", + "DataSourceParameters": {"AthenaParameters": {"WorkGroup": workgroup}}, + "SslProperties": {"DisableSsl": True}, + } + permissions: List[Dict[str, Union[str, List[str]]]] = _generate_permissions( + resource="data_source", + account_id=account_id, + boto3_session=session, + allowed_to_use=allowed_to_use, + allowed_to_manage=allowed_to_manage, + ) + if permissions: + args["Permissions"] = permissions + if tags is not None: + _tags: List[Dict[str, str]] = [{"Key": k, "Value": v} for k, v in tags.items()] + args["Tags"] = _tags + client.create_data_source(**args) + + +def create_athena_dataset( + name: str, + database: Optional[str] = None, + table: Optional[str] = None, + sql: Optional[str] = None, + sql_name: str = "CustomSQL", + data_source_name: Optional[str] = None, + data_source_arn: Optional[str] = None, + import_mode: str = "DIRECT_QUERY", + allowed_to_use: Optional[List[str]] = None, + allowed_to_manage: Optional[List[str]] = None, + logical_table_alias: str = "LogicalTable", + rename_columns: Optional[Dict[str, str]] = None, + cast_columns_types: Optional[Dict[str, str]] = None, + tags: Optional[Dict[str, str]] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> None: + """Create a QuickSight dataset. + + Note + ---- + You will not be able to see the the dataset in the console + if you not pass your user to one of the ``allowed_*`` arguments. + + Note + ---- + You must pass ``database``/``table`` OR ``sql`` argument. + + Note + ---- + You must pass ``data_source_name`` OR ``data_source_arn`` argument. + + Parameters + ---------- + name : str + Dataset name. + database : str + Athena's database name. + table : str + Athena's table name. + sql : str + Use a SQL query to define your table. + sql_name : str + Query name. + data_source_name : str, optional + QuickSight data source name. + data_source_arn : str, optional + QuickSight data source ARN. + import_mode : str + Indicates whether you want to import the data into SPICE. + 'SPICE'|'DIRECT_QUERY' + tags : Dict[str, str], optional + Key/Value collection to put on the Cluster. + e.g. {"foo": "boo", "bar": "xoo"}) + allowed_to_use : optional + List of principals that will be allowed to see and use the data source. + e.g. ["john", "Mary"] + allowed_to_manage : optional + List of principals that will be allowed to see, use, update and delete the data source. + e.g. ["Mary"] + logical_table_alias : str + A display name for the logical table. + rename_columns : Dict[str, str], optional + Dictionary to map column renames. e.g. {"old_name": "new_name", "old_name2": "new_name2"} + cast_columns_types : Dict[str, str], optional + Dictionary to map column casts. e.g. {"col_name": "STRING", "col_name2": "DECIMAL"} + Valid types: 'STRING'|'INTEGER'|'DECIMAL'|'DATETIME' + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.create_athena_dataset( + ... name="...", + ... database="..." + ... table="..." + ... data_source_name="..." + ... allowed_to_manage=["Mary"] + ... ) + """ + if (data_source_name is None) and (data_source_arn is None): + raise exceptions.InvalidArgument("You must pass a not None data_source_name or data_source_arn argument.") + if ((database is None) and (table is None)) and (sql is None): + raise exceptions.InvalidArgument("You must pass database/table OR sql argument.") + if (database is not None) and (sql is not None): + raise exceptions.InvalidArgument("If you provide sql argument, please include the database name inside the sql statement. Do NOT pass in with database argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (data_source_arn is None) and (data_source_name is not None): + data_source_arn = get_data_source_arn(name=data_source_name, account_id=account_id, boto3_session=session) + if sql is not None: + physical_table: Dict[str, Dict[str, Any]] = { + "CustomSql": { + "DataSourceArn": data_source_arn, + "Name": sql_name, + "SqlQuery": sql, + "Columns": extract_athena_query_columns( + sql=sql, + data_source_arn=data_source_arn, # type: ignore + account_id=account_id, + boto3_session=session, + ), + } + } + else: + physical_table = { + "RelationalTable": { + "DataSourceArn": data_source_arn, + "Schema": database, + "Name": table, + "InputColumns": extract_athena_table_columns( + database=database, # type: ignore + table=table, # type: ignore + boto3_session=session, + ), + } + } + table_uuid: str = uuid.uuid4().hex + args: Dict[str, Any] = { + "AwsAccountId": account_id, + "DataSetId": name, + "Name": name, + "ImportMode": import_mode, + "PhysicalTableMap": {table_uuid: physical_table}, + "LogicalTableMap": {table_uuid: {"Alias": logical_table_alias, "Source": {"PhysicalTableId": table_uuid}}}, + } + trans: List[Dict[str, Dict[str, Any]]] = _generate_transformations( + rename_columns=rename_columns, cast_columns_types=cast_columns_types + ) + if trans: + args["LogicalTableMap"][table_uuid]["DataTransforms"] = trans + permissions: List[Dict[str, Union[str, List[str]]]] = _generate_permissions( + resource="dataset", + account_id=account_id, + boto3_session=session, + allowed_to_use=allowed_to_use, + allowed_to_manage=allowed_to_manage, + ) + if permissions: + args["Permissions"] = permissions + if tags is not None: + _tags: List[Dict[str, str]] = [{"Key": k, "Value": v} for k, v in tags.items()] + args["Tags"] = _tags + client.create_data_set(**args) + + +def create_ingestion( + dataset_name: Optional[str] = None, + dataset_id: Optional[str] = None, + ingestion_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> str: + """Create and starts a new SPICE ingestion on a dataset. + + Note + ---- + You must pass ``dataset_name`` OR ``dataset_id`` argument. + + Parameters + ---------- + dataset_name : str, optional + Dataset name. + dataset_id : str, optional + Dataset ID. + ingestion_id : str, optional + Ingestion ID. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + str + Ingestion ID + + Examples + -------- + >>> import awswrangler as wr + >>> status = wr.quicksight.create_ingestion("my_dataset") + """ + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (dataset_name is None) and (dataset_id is None): + raise exceptions.InvalidArgument("You must pass a not None dataset_name or dataset_id argument.") + if (dataset_id is None) and (dataset_name is not None): + dataset_id = get_dataset_id(name=dataset_name, account_id=account_id, boto3_session=session) + if ingestion_id is None: + ingestion_id = uuid.uuid4().hex + client: boto3.client = _utils.client(service_name="quicksight", session=session) + response: Dict[str, Any] = client.create_ingestion( + DataSetId=dataset_id, IngestionId=ingestion_id, AwsAccountId=account_id + ) + return response["IngestionId"] diff --git a/awswrangler/quicksight/_delete.py b/awswrangler/quicksight/_delete.py new file mode 100644 index 000000000..cc45e9108 --- /dev/null +++ b/awswrangler/quicksight/_delete.py @@ -0,0 +1,339 @@ +"""Amazon QuickSight Delete Module.""" + +import logging +from typing import Any, Callable, Dict, Optional + +import boto3 # type: ignore + +from awswrangler import _utils, exceptions +from awswrangler.quicksight._get_list import ( + get_dashboard_id, + get_data_source_id, + get_dataset_id, + get_template_id, + list_dashboards, + list_data_sources, + list_datasets, + list_templates, +) + +_logger: logging.Logger = logging.getLogger(__name__) + + +def _delete( + func_name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None, **kwargs +) -> None: + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + func: Callable = getattr(client, func_name) + func(AwsAccountId=account_id, **kwargs) + + +def delete_dashboard( + name: Optional[str] = None, + dashboard_id: Optional[str] = None, + version_number: Optional[int] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> None: + """Delete a dashboard. + + Note + ---- + You must pass a not None ``name`` or ``dashboard_id`` argument. + + Parameters + ---------- + name : str, optional + Dashboard name. + dashboard_id : str, optional + The ID for the dashboard. + version_number : int, optional + The version number of the dashboard. If the version number property is provided, + only the specified version of the dashboard is deleted. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_dashboard(name="...") + """ + if (name is None) and (dashboard_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or dashboard_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if (dashboard_id is None) and (name is not None): + dashboard_id = get_dashboard_id(name=name, account_id=account_id, boto3_session=session) + args: Dict[str, Any] = { + "func_name": "delete_dashboard", + "account_id": account_id, + "boto3_session": session, + "DashboardId": dashboard_id, + } + if version_number is not None: + args["VersionNumber"] = version_number + _delete(**args) + + +def delete_dataset( + name: Optional[str] = None, + dataset_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> None: + """Delete a dataset. + + Note + ---- + You must pass a not None ``name`` or ``dataset_id`` argument. + + Parameters + ---------- + name : str, optional + Dashboard name. + dataset_id : str, optional + The ID for the dataset. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_dataset(name="...") + """ + if (name is None) and (dataset_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or dataset_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if (dataset_id is None) and (name is not None): + dataset_id = get_dataset_id(name=name, account_id=account_id, boto3_session=session) + args: Dict[str, Any] = { + "func_name": "delete_data_set", + "account_id": account_id, + "boto3_session": session, + "DataSetId": dataset_id, + } + _delete(**args) + + +def delete_data_source( + name: Optional[str] = None, + data_source_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> None: + """Delete a data source. + + Note + ---- + You must pass a not None ``name`` or ``data_source_id`` argument. + + Parameters + ---------- + name : str, optional + Dashboard name. + data_source_id : str, optional + The ID for the data source. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_data_source(name="...") + """ + if (name is None) and (data_source_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or data_source_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if (data_source_id is None) and (name is not None): + data_source_id = get_data_source_id(name=name, account_id=account_id, boto3_session=session) + args: Dict[str, Any] = { + "func_name": "delete_data_source", + "account_id": account_id, + "boto3_session": session, + "DataSourceId": data_source_id, + } + _delete(**args) + + +def delete_template( + name: Optional[str] = None, + template_id: Optional[str] = None, + version_number: Optional[int] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> None: + """Delete a tamplate. + + Note + ---- + You must pass a not None ``name`` or ``template_id`` argument. + + Parameters + ---------- + name : str, optional + Dashboard name. + template_id : str, optional + The ID for the dashboard. + version_number : int, optional + Specifies the version of the template that you want to delete. + If you don't provide a version number, it deletes all versions of the template. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_template(name="...") + """ + if (name is None) and (template_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or template_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if (template_id is None) and (name is not None): + template_id = get_template_id(name=name, account_id=account_id, boto3_session=session) + args: Dict[str, Any] = { + "func_name": "delete_template", + "account_id": account_id, + "boto3_session": session, + "TemplateId": template_id, + } + if version_number is not None: + args["VersionNumber"] = version_number + _delete(**args) + + +def delete_all_dashboards(account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> None: + """Delete all dashboards. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_all_dashboards() + """ + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + for dashboard in list_dashboards(account_id=account_id, boto3_session=session): + delete_dashboard(dashboard_id=dashboard["DashboardId"], account_id=account_id, boto3_session=session) + + +def delete_all_datasets(account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> None: + """Delete all datasets. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_all_datasets() + """ + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + for dataset in list_datasets(account_id=account_id, boto3_session=session): + delete_dataset(dataset_id=dataset["DataSetId"], account_id=account_id, boto3_session=session) + + +def delete_all_data_sources(account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> None: + """Delete all data sources. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_all_data_sources() + """ + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + for data_source in list_data_sources(account_id=account_id, boto3_session=session): + delete_data_source(data_source_id=data_source["DataSourceId"], account_id=account_id, boto3_session=session) + + +def delete_all_templates(account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> None: + """Delete all templates. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + None + None. + + Examples + -------- + >>> import awswrangler as wr + >>> wr.quicksight.delete_all_templates() + """ + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + for template in list_templates(account_id=account_id, boto3_session=session): + delete_template(template_id=template["TemplateId"], account_id=account_id, boto3_session=session) diff --git a/awswrangler/quicksight/_describe.py b/awswrangler/quicksight/_describe.py new file mode 100644 index 000000000..d46b2bfb6 --- /dev/null +++ b/awswrangler/quicksight/_describe.py @@ -0,0 +1,236 @@ +"""Amazon QuickSight Describe Module.""" + +import logging +from typing import Any, Dict, Optional + +import boto3 # type: ignore + +from awswrangler import _utils, exceptions +from awswrangler.quicksight._get_list import get_dashboard_id, get_data_source_id, get_dataset_id + +_logger: logging.Logger = logging.getLogger(__name__) + + +def describe_dashboard( + name: Optional[str] = None, + dashboard_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> Dict[str, Any]: + """Describe a QuickSight dashboard by name or ID. + + Note + ---- + You must pass a not None ``name`` or ``dashboard_id`` argument. + + Parameters + ---------- + name : str, optional + Dashboard name. + dashboard_id : str, optional + Dashboard ID. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + Dict[str, Any] + Dashboad Description. + + Examples + -------- + >>> import awswrangler as wr + >>> description = wr.quicksight.describe_dashboard(name="my-dashboard") + """ + if (name is None) and (dashboard_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or dashboard_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (dashboard_id is None) and (name is not None): + dashboard_id = get_dashboard_id(name=name, account_id=account_id, boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + return client.describe_dashboard(AwsAccountId=account_id, DashboardId=dashboard_id)["Dashboard"] + + +def describe_data_source( + name: Optional[str] = None, + data_source_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> Dict[str, Any]: + """Describe a QuickSight data source by name or ID. + + Note + ---- + You must pass a not None ``name`` or ``data_source_id`` argument. + + Parameters + ---------- + name : str, optional + Data source name. + data_source_id : str, optional + Data source ID. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + Dict[str, Any] + Data source Description. + + Examples + -------- + >>> import awswrangler as wr + >>> description = wr.quicksight.describe_data_source("...") + """ + if (name is None) and (data_source_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or data_source_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (data_source_id is None) and (name is not None): + data_source_id = get_data_source_id(name=name, account_id=account_id, boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + return client.describe_data_source(AwsAccountId=account_id, DataSourceId=data_source_id)["DataSource"] + + +def describe_data_source_permissions( + name: Optional[str] = None, + data_source_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> Dict[str, Any]: + """Describe a QuickSight data source permissions by name or ID. + + Note + ---- + You must pass a not None ``name`` or ``data_source_id`` argument. + + Parameters + ---------- + name : str, optional + Data source name. + data_source_id : str, optional + Data source ID. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + Dict[str, Any] + Data source Permissions Description. + + Examples + -------- + >>> import awswrangler as wr + >>> description = wr.quicksight.describe_data_source_permissions("my-data-source") + """ + if (name is None) and (data_source_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or data_source_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (data_source_id is None) and (name is not None): + data_source_id = get_data_source_id(name=name, account_id=account_id, boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + return client.describe_data_source_permissions(AwsAccountId=account_id, DataSourceId=data_source_id)["Permissions"] + + +def describe_dataset( + name: Optional[str] = None, + dataset_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> Dict[str, Any]: + """Describe a QuickSight dataset by name or ID. + + Note + ---- + You must pass a not None ``name`` or ``dataset_id`` argument. + + Parameters + ---------- + name : str, optional + Dataset name. + dataset_id : str, optional + Dataset ID. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + Dict[str, Any] + Dataset Description. + + Examples + -------- + >>> import awswrangler as wr + >>> description = wr.quicksight.describe_dataset("my-dataset") + """ + if (name is None) and (dataset_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or dataset_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (dataset_id is None) and (name is not None): + dataset_id = get_dataset_id(name=name, account_id=account_id, boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + return client.describe_data_set(AwsAccountId=account_id, DataSetId=dataset_id)["DataSet"] + + +def describe_ingestion( + ingestion_id: str = None, + dataset_name: Optional[str] = None, + dataset_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> Dict[str, Any]: + """Describe a QuickSight ingestion by ID. + + Note + ---- + You must pass a not None value for ``dataset_name`` or ``dataset_id`` argument. + + Parameters + ---------- + ingestion_id : str + Ingestion ID. + dataset_name : str, optional + Dataset name. + dataset_id : str, optional + Dataset ID. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + Dict[str, Any] + Ingestion Description. + + Examples + -------- + >>> import awswrangler as wr + >>> description = wr.quicksight.describe_dataset(ingestion_id="...", dataset_name="...") + """ + if (dataset_name is None) and (dataset_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or dataset_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (dataset_id is None) and (dataset_name is not None): + dataset_id = get_dataset_id(name=dataset_name, account_id=account_id, boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + return client.describe_ingestion(IngestionId=ingestion_id, AwsAccountId=account_id, DataSetId=dataset_id)[ + "Ingestion" + ] diff --git a/awswrangler/quicksight/_get_list.py b/awswrangler/quicksight/_get_list.py new file mode 100644 index 000000000..98035e26e --- /dev/null +++ b/awswrangler/quicksight/_get_list.py @@ -0,0 +1,778 @@ +""" +Amazon QuickSight List and Get Module. + +List and Get MUST be together to avoid circular dependency. +""" + +import logging +from typing import Any, Callable, Dict, List, Optional + +import boto3 # type: ignore + +from awswrangler import _utils, exceptions + +_logger: logging.Logger = logging.getLogger(__name__) + + +def _list( + func_name: str, + attr_name: str, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, + **kwargs, +) -> List[Dict[str, Any]]: + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + client: boto3.client = _utils.client(service_name="quicksight", session=session) + func: Callable = getattr(client, func_name) + response = func(AwsAccountId=account_id, **kwargs) + next_token: str = response.get("NextToken", None) + result: List[Dict[str, Any]] = response[attr_name] + while next_token is not None: + response = func(AwsAccountId=account_id, NextToken=next_token, **kwargs) + next_token = response.get("NextToken", None) + result += response[attr_name] + return result + + +def list_dashboards( + account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[Dict[str, Any]]: + """List dashboards in an AWS account. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Dashboards. + + Examples + -------- + >>> import awswrangler as wr + >>> dashboards = wr.quicksight.list_dashboards() + """ + return _list( + func_name="list_dashboards", + attr_name="DashboardSummaryList", + account_id=account_id, + boto3_session=boto3_session, + ) + + +def list_datasets( + account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[Dict[str, Any]]: + """List all QuickSight datasets summaries. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Datasets summaries. + + Examples + -------- + >>> import awswrangler as wr + >>> datasets = wr.quicksight.list_datasets() + """ + return _list( + func_name="list_data_sets", attr_name="DataSetSummaries", account_id=account_id, boto3_session=boto3_session + ) + + +def list_data_sources( + account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[Dict[str, Any]]: + """List all QuickSight Data sources summaries. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Data sources summaries. + + Examples + -------- + >>> import awswrangler as wr + >>> sources = wr.quicksight.list_data_sources() + """ + return _list( + func_name="list_data_sources", attr_name="DataSources", account_id=account_id, boto3_session=boto3_session + ) + + +def list_templates( + account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[Dict[str, Any]]: + """List all QuickSight templates. + + Parameters + ---------- + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Templates summaries. + + Examples + -------- + >>> import awswrangler as wr + >>> templates = wr.quicksight.list_templates() + """ + return _list( + func_name="list_templates", attr_name="TemplateSummaryList", account_id=account_id, boto3_session=boto3_session + ) + + +def list_group_memberships( + group_name: str, + namespace: str = "default", + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> List[Dict[str, Any]]: + """List all QuickSight Group memberships. + + Parameters + ---------- + group_name : str + The name of the group that you want to see a membership list of. + namespace : str + The namespace. Currently, you should set this to default . + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Group memberships. + + Examples + -------- + >>> import awswrangler as wr + >>> memberships = wr.quicksight.list_group_memberships() + """ + return _list( + func_name="list_group_memberships", + attr_name="GroupMemberList", + account_id=account_id, + boto3_session=boto3_session, + GroupName=group_name, + Namespace=namespace, + ) + + +def list_groups( + namespace: str = "default", account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[Dict[str, Any]]: + """List all QuickSight Groups. + + Parameters + ---------- + namespace : str + The namespace. Currently, you should set this to default . + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Groups. + + Examples + -------- + >>> import awswrangler as wr + >>> groups = wr.quicksight.list_groups() + """ + return _list( + func_name="list_groups", + attr_name="GroupList", + account_id=account_id, + boto3_session=boto3_session, + Namespace=namespace, + ) + + +def list_iam_policy_assignments( + status: Optional[str] = None, + namespace: str = "default", + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> List[Dict[str, Any]]: + """List IAM policy assignments in the current Amazon QuickSight account. + + Parameters + ---------- + status : str, optional + The status of the assignments. + 'ENABLED'|'DRAFT'|'DISABLED' + namespace : str + The namespace. Currently, you should set this to default . + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + IAM policy assignments. + + Examples + -------- + >>> import awswrangler as wr + >>> assigns = wr.quicksight.list_iam_policy_assignments() + """ + args: Dict[str, Any] = { + "func_name": "list_iam_policy_assignments", + "attr_name": "IAMPolicyAssignments", + "account_id": account_id, + "boto3_session": boto3_session, + "Namespace": namespace, + } + if status is not None: + args["AssignmentStatus"] = status + return _list(**args) + + +def list_iam_policy_assignments_for_user( + user_name: str, + namespace: str = "default", + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> List[Dict[str, Any]]: + """List all the IAM policy assignments. + + Including the Amazon Resource Names (ARNs) for the IAM policies assigned + to the specified user and group or groups that the user belongs to. + + Parameters + ---------- + user_name : str + The name of the user. + namespace : str + The namespace. Currently, you should set this to default . + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + IAM policy assignments. + + Examples + -------- + >>> import awswrangler as wr + >>> assigns = wr.quicksight.list_iam_policy_assignments_for_user() + """ + return _list( + func_name="list_iam_policy_assignments_for_user", + attr_name="ActiveAssignments", + account_id=account_id, + boto3_session=boto3_session, + UserName=user_name, + Namespace=namespace, + ) + + +def list_user_groups( + user_name: str, + namespace: str = "default", + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> List[Dict[str, Any]]: + """List the Amazon QuickSight groups that an Amazon QuickSight user is a member of. + + Parameters + ---------- + user_name: str: + The Amazon QuickSight user name that you want to list group memberships for. + namespace : str + The namespace. Currently, you should set this to default . + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Groups. + + Examples + -------- + >>> import awswrangler as wr + >>> groups = wr.quicksight.list_user_groups() + """ + return _list( + func_name="list_user_groups", + attr_name="GroupList", + account_id=account_id, + boto3_session=boto3_session, + UserName=user_name, + Namespace=namespace, + ) + + +def list_users( + namespace: str = "default", account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[Dict[str, Any]]: + """Return a list of all of the Amazon QuickSight users belonging to this account. + + Parameters + ---------- + namespace : str + The namespace. Currently, you should set this to default . + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + Groups. + + Examples + -------- + >>> import awswrangler as wr + >>> users = wr.quicksight.list_users() + """ + return _list( + func_name="list_users", + attr_name="UserList", + account_id=account_id, + boto3_session=boto3_session, + Namespace=namespace, + ) + + +def list_ingestions( + dataset_name: Optional[str] = None, + dataset_id: Optional[str] = None, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> List[Dict[str, Any]]: + """List the history of SPICE ingestions for a dataset. + + Parameters + ---------- + dataset_name : str, optional + Dataset name. + dataset_id : str, optional + The ID of the dataset used in the ingestion. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[Dict[str, Any]] + IAM policy assignments. + + Examples + -------- + >>> import awswrangler as wr + >>> ingestions = wr.quicksight.list_ingestions() + """ + if (dataset_name is None) and (dataset_id is None): + raise exceptions.InvalidArgument("You must pass a not None name or dataset_id argument.") + session: boto3.Session = _utils.ensure_session(session=boto3_session) + if account_id is None: + account_id = _utils.get_account_id(boto3_session=session) + if (dataset_id is None) and (dataset_name is not None): + dataset_id = get_dataset_id(name=dataset_name, account_id=account_id, boto3_session=session) + return _list( + func_name="list_ingestions", + attr_name="Ingestions", + account_id=account_id, + boto3_session=boto3_session, + DataSetId=dataset_id, + ) + + +def _get_ids( + name: str, + func: Callable, + attr_name: str, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> List[str]: + ids: List[str] = [] + for item in func(account_id=account_id, boto3_session=boto3_session): + if item["Name"] == name: + ids.append(item[attr_name]) + return ids + + +def _get_id( + name: str, + func: Callable, + attr_name: str, + account_id: Optional[str] = None, + boto3_session: Optional[boto3.Session] = None, +) -> str: + ids: List[str] = _get_ids( + name=name, func=func, attr_name=attr_name, account_id=account_id, boto3_session=boto3_session + ) + if len(ids) == 0: + raise exceptions.InvalidArgument(f"There is no {attr_name} related with name {name}") + if len(ids) > 1: + raise exceptions.InvalidArgument( + f"There is {len(ids)} {attr_name} with name {name}. " + f"Please pass the id argument to specify " + f"which one you would like to describe." + ) + return ids[0] + + +def get_dashboard_ids( + name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[str]: + """Get QuickSight dashboard IDs given a name. + + Note + ---- + This function returns a list of ID because Quicksight accepts duplicated dashboard names, + so you may have more than 1 ID for a given name. + + Parameters + ---------- + name : str + Dashboard name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[str] + Dashboad IDs. + + Examples + -------- + >>> import awswrangler as wr + >>> ids = wr.quicksight.get_dashboard_ids(name="...") + """ + return _get_ids( + name=name, func=list_dashboards, attr_name="DashboardId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_dashboard_id(name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> str: + """Get QuickSight dashboard ID given a name and fails if there is more than 1 ID associated with this name. + + Parameters + ---------- + name : str + Dashboard name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + str + Dashboad ID. + + Examples + -------- + >>> import awswrangler as wr + >>> my_id = wr.quicksight.get_dashboard_id(name="...") + """ + return _get_id( + name=name, func=list_dashboards, attr_name="DashboardId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_dataset_ids( + name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[str]: + """Get QuickSight dataset IDs given a name. + + Note + ---- + This function returns a list of ID because Quicksight accepts duplicated datasets names, + so you may have more than 1 ID for a given name. + + Parameters + ---------- + name : str + Dataset name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[str] + Datasets IDs. + + Examples + -------- + >>> import awswrangler as wr + >>> ids = wr.quicksight.get_dataset_ids(name="...") + """ + return _get_ids( + name=name, func=list_datasets, attr_name="DataSetId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_dataset_id(name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> str: + """Get QuickSight Dataset ID given a name and fails if there is more than 1 ID associated with this name. + + Parameters + ---------- + name : str + Dataset name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + str + Dataset ID. + + Examples + -------- + >>> import awswrangler as wr + >>> my_id = wr.quicksight.get_dataset_id(name="...") + """ + return _get_id( + name=name, func=list_datasets, attr_name="DataSetId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_data_source_ids( + name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[str]: + """Get QuickSight data source IDs given a name. + + Note + ---- + This function returns a list of ID because Quicksight accepts duplicated data source names, + so you may have more than 1 ID for a given name. + + Parameters + ---------- + name : str + Data source name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[str] + Data source IDs. + + Examples + -------- + >>> import awswrangler as wr + >>> ids = wr.quicksight.get_data_source_ids(name="...") + """ + return _get_ids( + name=name, func=list_data_sources, attr_name="DataSourceId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_data_source_id( + name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> str: + """Get QuickSight data source ID given a name and fails if there is more than 1 ID associated with this name. + + Parameters + ---------- + name : str + Data source name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + str + Dataset ID. + + Examples + -------- + >>> import awswrangler as wr + >>> my_id = wr.quicksight.get_data_source_id(name="...") + """ + return _get_id( + name=name, func=list_data_sources, attr_name="DataSourceId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_template_ids( + name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[str]: + """Get QuickSight template IDs given a name. + + Note + ---- + This function returns a list of ID because Quicksight accepts duplicated templates names, + so you may have more than 1 ID for a given name. + + Parameters + ---------- + name : str + Template name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[str] + Tamplate IDs. + + Examples + -------- + >>> import awswrangler as wr + >>> ids = wr.quicksight.get_template_ids(name="...") + """ + return _get_ids( + name=name, func=list_templates, attr_name="TemplateId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_template_id(name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> str: + """Get QuickSight template ID given a name and fails if there is more than 1 ID associated with this name. + + Parameters + ---------- + name : str + Template name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + str + Template ID. + + Examples + -------- + >>> import awswrangler as wr + >>> my_id = wr.quicksight.get_template_id(name="...") + """ + return _get_id( + name=name, func=list_templates, attr_name="TemplateId", account_id=account_id, boto3_session=boto3_session + ) + + +def get_data_source_arns( + name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> List[str]: + """Get QuickSight Data source ARNs given a name. + + Note + ---- + This function returns a list of ARNs because Quicksight accepts duplicated data source names, + so you may have more than 1 ARN for a given name. + + Parameters + ---------- + name : str + Data source name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + List[str] + Data source ARNs. + + Examples + -------- + >>> import awswrangler as wr + >>> arns = wr.quicksight.get_data_source_arns(name="...") + """ + arns: List[str] = [] + for source in list_data_sources(account_id=account_id, boto3_session=boto3_session): + if source["Name"] == name: + arns.append(source["Arn"]) + return arns + + +def get_data_source_arn( + name: str, account_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None +) -> str: + """Get QuickSight data source ARN given a name and fails if there is more than 1 ARN associated with this name. + + Note + ---- + This function returns a list of ARNs because Quicksight accepts duplicated data source names, + so you may have more than 1 ARN for a given name. + + Parameters + ---------- + name : str + Data source name. + account_id : str, optional + If None, the account ID will be inferred from your boto3 session. + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 session will be used if boto3_session receive None. + + Returns + ------- + str + Data source ARN. + + Examples + -------- + >>> import awswrangler as wr + >>> arn = wr.quicksight.get_data_source_arn("...") + """ + arns: List[str] = get_data_source_arns(name=name, account_id=account_id, boto3_session=boto3_session) + if len(arns) == 0: + raise exceptions.InvalidArgument(f"There is not data source with name {name}") + if len(arns) > 1: + raise exceptions.InvalidArgument( + f"There is more than 1 data source with name {name}. " + f"Please pass the data_source_arn argument to specify " + f"which one you would like to describe." + ) + return arns[0] diff --git a/awswrangler/quicksight/_utils.py b/awswrangler/quicksight/_utils.py new file mode 100644 index 000000000..957cf9f53 --- /dev/null +++ b/awswrangler/quicksight/_utils.py @@ -0,0 +1,35 @@ +"""Internal (private) Amazon QuickSight Utilities Module.""" + +import logging +from typing import Any, Dict, List, Optional + +import boto3 # type: ignore + +from awswrangler import _data_types, athena, catalog, exceptions +from awswrangler.quicksight._get_list import list_data_sources + +_logger: logging.Logger = logging.getLogger(__name__) + + +def extract_athena_table_columns(database: str, table: str, boto3_session: boto3.Session) -> List[Dict[str, str]]: + """Extract athena columns data types from table and raising an exception if not exist.""" + dtypes: Optional[Dict[str, str]] = catalog.get_table_types( + database=database, table=table, boto3_session=boto3_session + ) + if dtypes is None: + raise exceptions.InvalidArgument(f"{database}.{table} does not exist on Athena.") + return [{"Name": name, "Type": _data_types.athena2quicksight(dtype=dtype)} for name, dtype in dtypes.items()] + + +def extract_athena_query_columns( + sql: str, data_source_arn: str, account_id: str, boto3_session: boto3.Session +) -> List[Dict[str, str]]: + """Extract athena columns data types from a SQL query.""" + data_sources: List[Dict[str, Any]] = list_data_sources(account_id=account_id, boto3_session=boto3_session) + data_source: Dict[str, Any] = [x for x in data_sources if x["Arn"] == data_source_arn][0] + workgroup: str = data_source["DataSourceParameters"]["AthenaParameters"]["WorkGroup"] + sql_wrapped: str = f"/* QuickSight */\nSELECT ds.* FROM ( {sql} ) ds LIMIT 0" + query_id: str = athena.start_query_execution(sql=sql_wrapped, workgroup=workgroup, boto3_session=boto3_session) + athena.wait_query(query_execution_id=query_id, boto3_session=boto3_session) + dtypes: Dict[str, str] = athena.get_query_columns_types(query_execution_id=query_id, boto3_session=boto3_session) + return [{"Name": name, "Type": _data_types.athena2quicksight(dtype=dtype)} for name, dtype in dtypes.items()] diff --git a/docs/source/api.rst b/docs/source/api.rst index 5cd8e9e3c..a61526683 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -42,8 +42,10 @@ AWS Glue Catalog add_csv_partitions add_parquet_partitions create_csv_table + create_database create_parquet_table databases + delete_database delete_table_if_exists does_table_exist drop_duplicated_columns @@ -135,3 +137,49 @@ CloudWatch Logs run_query start_query wait_query + +Amazon QuickSight +----------------- + +.. currentmodule:: awswrangler.quicksight + +.. autosummary:: + :toctree: stubs + + cancel_ingestion + create_athena_data_source + create_athena_dataset + create_ingestion + delete_all_dashboards + delete_all_data_sources + delete_all_datasets + delete_all_templates + delete_dashboard + delete_data_source + delete_dataset + delete_template + describe_dashboard + describe_data_source + describe_data_source_permissions + describe_dataset + describe_ingestion + get_dashboard_id + get_dashboard_ids + get_data_source_arn + get_data_source_arns + get_data_source_id + get_data_source_ids + get_dataset_id + get_dataset_ids + get_template_id + get_template_ids + list_dashboards + list_data_sources + list_datasets + list_groups + list_group_memberships + list_iam_policy_assignments + list_iam_policy_assignments_for_user + list_ingestions + list_templates + list_users diff --git a/docs/source/index.rst b/docs/source/index.rst index 6c0380007..2335c0209 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -26,11 +26,30 @@ Quick Start df = wr.athena.read_sql_query("SELECT * FROM my_table", database="my_db") # Getting Redshift connection (SQLAlchemy) from Glue Catalog Connections - engine = wr.catalog.get_engine("my-redshift-connection") - # Retrieving the data from Amazon Redshift Spectrum + engine = wr.catalog.get_engine("my-redshift-connection") df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine) + # Creating QuickSight Data Source and Dataset to reflect our new table + wr.quicksight.create_athena_data_source("athena-source", allowed_to_manage=["username"]) + wr.quicksight.create_athena_dataset( + name="my-dataset", + database="my_db", + table="my_table", + data_source_name="athena-source", + allowed_to_manage=["username"] + ) + + # Getting MySQL connection (SQLAlchemy) from Glue Catalog Connections + # Load the data into MySQL + engine = wr.catalog.get_engine("my-mysql-connection") + wr.db.to_sql(df, engine, schema="test", name="my_table") + + # Getting PostgreSQL connection (SQLAlchemy) from Glue Catalog Connections + # Load the data into PostgreSQL + engine = wr.catalog.get_engine("my-postgresql-connection") + wr.db.to_sql(df, engine, schema="test", name="my_table") + Read The Docs ------------- diff --git a/docs/source/what.rst b/docs/source/what.rst index 0a169b74d..71c721782 100644 --- a/docs/source/what.rst +++ b/docs/source/what.rst @@ -1,7 +1,7 @@ What is AWS Data Wrangler? ========================== -An `open-source `_ Python package that extends the power of `Pandas `_ library to AWS connecting **DataFrames** and AWS data related services (**Amazon Redshift**, **AWS Glue**, **Amazon Athena**, **Amazon EMR**, etc). +An `open-source `_ Python package that extends the power of `Pandas `_ library to AWS connecting **DataFrames** and AWS data related services (**Amazon Redshift**, **AWS Glue**, **Amazon Athena**, **Amazon EMR**, **Amazon QuickSight**, etc). Built on top of other open-source projects like `Pandas `_, `Apache Arrow `_, `Boto3 `_, `s3fs `_, `SQLAlchemy `_, `Psycopg2 `_ and `PyMySQL `_, it offers abstracted functions to execute usual ETL tasks like load/unload data from **Data Lakes**, **Data Warehouses** and **Databases**. diff --git a/requirements.txt b/requirements.txt index 6ee02a1fb..273fff794 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ s3fs~=0.4.2 psycopg2-binary~=2.8.0 pymysql~=0.9.0 sqlalchemy-redshift~=0.7.0 -SQLAlchemy~=1.3.10 +SQLAlchemy>=1.3.10,<1.3.14 diff --git a/tutorials/002 - Sessions.ipynb b/tutorials/002 - Sessions.ipynb index 2ff88ad1a..b305ed429 100644 --- a/tutorials/002 - Sessions.ipynb +++ b/tutorials/002 - Sessions.ipynb @@ -124,28 +124,6 @@ "\n", "wr.s3.does_object_exist(\"s3://noaa-ghcn-pds/fake\", boto3_session=my_session)" ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_session = boto3.Session(region_name=\"us-east-2\")\n", - "\n", - "wr.s3.does_object_exist(\"s3://noaa-ghcn-pds/fake\", boto3_session=my_session)" - ] } ], "metadata": { diff --git a/tutorials/018 - QuickSight.ipynb b/tutorials/018 - QuickSight.ipynb new file mode 100644 index 000000000..a90fe6573 --- /dev/null +++ b/tutorials/018 - QuickSight.ipynb @@ -0,0 +1,1298 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![AWS Data Wrangler](_static/logo.png \"AWS Data Wrangler\")](https://github.com/awslabs/aws-data-wrangler)\n", + "\n", + "# 18 - QuickSight\n", + "\n", + "For this tutorial we will use the public AWS COVID-19 data lake.\n", + "\n", + "References:\n", + "\n", + "* [A public data lake for analysis of COVID-19 data](https://aws.amazon.com/blogs/big-data/a-public-data-lake-for-analysis-of-covid-19-data/)\n", + "* [Exploring the public AWS COVID-19 data lake](https://aws.amazon.com/blogs/big-data/exploring-the-public-aws-covid-19-data-lake/)\n", + "* [CloudFormation template](https://covid19-lake.s3.us-east-2.amazonaws.com/cfn/CovidLakeStack.template.json)\n", + "\n", + "*Please, install the Cloudformation template above to have access to the public data lake.*\n", + "\n", + "*P.S. To be able to access the public data lake, you must allow explicitly QuickSight to access the related external bucket.*" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [], + "source": [ + "import awswrangler as wr\n", + "from time import sleep" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "List users of QuickSight account" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'username': 'dev', 'role': 'ADMIN'}]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[{\"username\": user[\"UserName\"], \"role\": user[\"Role\"]} for user in wr.quicksight.list_users('default')]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatabaseDescription
0aws_data_wranglerAWS Data Wrangler Test Arena - Glue Database
1awswrangler_test
2covid-19
3defaultDefault Hive database
\n", + "
" + ], + "text/plain": [ + " Database Description\n", + "0 aws_data_wrangler AWS Data Wrangler Test Arena - Glue Database\n", + "1 awswrangler_test \n", + "2 covid-19 \n", + "3 default Default Hive database" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.catalog.databases()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatabaseTableDescriptionColumnsPartitions
0covid-19alleninstitute_comprehend_medicalComprehend Medical results run against Allen I...paper_id, date, dx_name, test_name, procedure_...
1covid-19alleninstitute_metadataMetadata on papers pulled from the Allen Insti...cord_uid, sha, source_x, title, doi, pmcid, pu...
2covid-19country_codesLookup table for country codescountry, alpha-2 code, alpha-3 code, numeric c...
3covid-19county_populationsLookup table for population for each county ba...id, id2, county, state, population estimate 2018
4covid-19covid_knowledge_graph_edgesAWS Knowledge Graph for COVID-19 dataid, label, from, to, score
5covid-19covid_knowledge_graph_nodes_authorAWS Knowledge Graph for COVID-19 dataid, label, first, last, full_name
6covid-19covid_knowledge_graph_nodes_conceptAWS Knowledge Graph for COVID-19 dataid, label, entity, concept
7covid-19covid_knowledge_graph_nodes_institutionAWS Knowledge Graph for COVID-19 dataid, label, institution, country, settlement
8covid-19covid_knowledge_graph_nodes_paperAWS Knowledge Graph for COVID-19 dataid, label, doi, sha_code, publish_time, source...
9covid-19covid_knowledge_graph_nodes_topicAWS Knowledge Graph for COVID-19 dataid, label, topic, topic_num
10covid-19covid_testing_states_dailyUSA total test daily trend by state. Sourced ...date, state, positive, negative, pending, hosp...
11covid-19covid_testing_us_dailyUSA total test daily trend. Sourced from covi...date, states, positive, negative, posneg, pend...
12covid-19covid_testing_us_totalUSA total tests. Sourced from covidtracking.c...positive, negative, posneg, hospitalized, deat...
13covid-19covidcast_dataCMU Delphi's COVID-19 Surveillance Datadata_source, signal, geo_type, time_value, geo...
14covid-19covidcast_metadataCMU Delphi's COVID-19 Surveillance Metadatadata_source, signal, time_type, geo_type, min_...
15covid-19enigma_jhuJohns Hopkins University Consolidated data on ...fips, admin2, province_state, country_region, ...
16covid-19enigma_jhu_timeseriesJohns Hopkins University data on COVID-19 case...uid, fips, iso2, iso3, code3, admin2, latitude...
17covid-19hospital_bedsData on hospital beds and their utilization in...objectid, hospital_name, hospital_type, hq_add...
18covid-19nytimes_countiesData on COVID-19 cases from NY Times at US cou...date, county, state, fips, cases, deaths
19covid-19nytimes_statesData on COVID-19 cases from NY Times at US sta...date, state, fips, cases, deaths
20covid-19prediction_models_county_predictionsCounty-level Predictions Data. Sourced from Yu...countyfips, countyname, statename, severity_co...
21covid-19prediction_models_severity_indexSeverity Index models. Sourced from Yu Group a...severity_1-day, severity_2-day, severity_3-day...
22covid-19tableau_covid_datahubCOVID-19 data that has been gathered and unifi...country_short_name, country_alpha_3_code, coun...
23covid-19tableau_jhuJohns Hopkins University data on COVID-19 case...case_type, cases, difference, date, country_re...
24covid-19us_state_abbreviationsLookup table for US state abbreviationsstate, abbreviation
25covid-19world_cases_deaths_testingData on confirmed cases, deaths, and testing. ...iso_code, location, date, total_cases, new_cas...
\n", + "
" + ], + "text/plain": [ + " Database Table \\\n", + "0 covid-19 alleninstitute_comprehend_medical \n", + "1 covid-19 alleninstitute_metadata \n", + "2 covid-19 country_codes \n", + "3 covid-19 county_populations \n", + "4 covid-19 covid_knowledge_graph_edges \n", + "5 covid-19 covid_knowledge_graph_nodes_author \n", + "6 covid-19 covid_knowledge_graph_nodes_concept \n", + "7 covid-19 covid_knowledge_graph_nodes_institution \n", + "8 covid-19 covid_knowledge_graph_nodes_paper \n", + "9 covid-19 covid_knowledge_graph_nodes_topic \n", + "10 covid-19 covid_testing_states_daily \n", + "11 covid-19 covid_testing_us_daily \n", + "12 covid-19 covid_testing_us_total \n", + "13 covid-19 covidcast_data \n", + "14 covid-19 covidcast_metadata \n", + "15 covid-19 enigma_jhu \n", + "16 covid-19 enigma_jhu_timeseries \n", + "17 covid-19 hospital_beds \n", + "18 covid-19 nytimes_counties \n", + "19 covid-19 nytimes_states \n", + "20 covid-19 prediction_models_county_predictions \n", + "21 covid-19 prediction_models_severity_index \n", + "22 covid-19 tableau_covid_datahub \n", + "23 covid-19 tableau_jhu \n", + "24 covid-19 us_state_abbreviations \n", + "25 covid-19 world_cases_deaths_testing \n", + "\n", + " Description \\\n", + "0 Comprehend Medical results run against Allen I... \n", + "1 Metadata on papers pulled from the Allen Insti... \n", + "2 Lookup table for country codes \n", + "3 Lookup table for population for each county ba... \n", + "4 AWS Knowledge Graph for COVID-19 data \n", + "5 AWS Knowledge Graph for COVID-19 data \n", + "6 AWS Knowledge Graph for COVID-19 data \n", + "7 AWS Knowledge Graph for COVID-19 data \n", + "8 AWS Knowledge Graph for COVID-19 data \n", + "9 AWS Knowledge Graph for COVID-19 data \n", + "10 USA total test daily trend by state. Sourced ... \n", + "11 USA total test daily trend. Sourced from covi... \n", + "12 USA total tests. Sourced from covidtracking.c... \n", + "13 CMU Delphi's COVID-19 Surveillance Data \n", + "14 CMU Delphi's COVID-19 Surveillance Metadata \n", + "15 Johns Hopkins University Consolidated data on ... \n", + "16 Johns Hopkins University data on COVID-19 case... \n", + "17 Data on hospital beds and their utilization in... \n", + "18 Data on COVID-19 cases from NY Times at US cou... \n", + "19 Data on COVID-19 cases from NY Times at US sta... \n", + "20 County-level Predictions Data. Sourced from Yu... \n", + "21 Severity Index models. Sourced from Yu Group a... \n", + "22 COVID-19 data that has been gathered and unifi... \n", + "23 Johns Hopkins University data on COVID-19 case... \n", + "24 Lookup table for US state abbreviations \n", + "25 Data on confirmed cases, deaths, and testing. ... \n", + "\n", + " Columns Partitions \n", + "0 paper_id, date, dx_name, test_name, procedure_... \n", + "1 cord_uid, sha, source_x, title, doi, pmcid, pu... \n", + "2 country, alpha-2 code, alpha-3 code, numeric c... \n", + "3 id, id2, county, state, population estimate 2018 \n", + "4 id, label, from, to, score \n", + "5 id, label, first, last, full_name \n", + "6 id, label, entity, concept \n", + "7 id, label, institution, country, settlement \n", + "8 id, label, doi, sha_code, publish_time, source... \n", + "9 id, label, topic, topic_num \n", + "10 date, state, positive, negative, pending, hosp... \n", + "11 date, states, positive, negative, posneg, pend... \n", + "12 positive, negative, posneg, hospitalized, deat... \n", + "13 data_source, signal, geo_type, time_value, geo... \n", + "14 data_source, signal, time_type, geo_type, min_... \n", + "15 fips, admin2, province_state, country_region, ... \n", + "16 uid, fips, iso2, iso3, code3, admin2, latitude... \n", + "17 objectid, hospital_name, hospital_type, hq_add... \n", + "18 date, county, state, fips, cases, deaths \n", + "19 date, state, fips, cases, deaths \n", + "20 countyfips, countyname, statename, severity_co... \n", + "21 severity_1-day, severity_2-day, severity_3-day... \n", + "22 country_short_name, country_alpha_3_code, coun... \n", + "23 case_type, cases, difference, date, country_re... \n", + "24 state, abbreviation \n", + "25 iso_code, location, date, total_cases, new_cas... " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.catalog.tables(database=\"covid-19\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create data source of QuickSight\n", + "Note: data source stores the connection information." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "wr.quicksight.create_athena_data_source(\n", + " name=\"covid-19\",\n", + " workgroup=\"primary\",\n", + " allowed_to_manage=[\"dev\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatabaseTableDescriptionColumnsPartitions
0covid-19nytimes_countiesData on COVID-19 cases from NY Times at US cou...date, county, state, fips, cases, deaths
1covid-19nytimes_statesData on COVID-19 cases from NY Times at US sta...date, state, fips, cases, deaths
\n", + "
" + ], + "text/plain": [ + " Database Table \\\n", + "0 covid-19 nytimes_counties \n", + "1 covid-19 nytimes_states \n", + "\n", + " Description \\\n", + "0 Data on COVID-19 cases from NY Times at US cou... \n", + "1 Data on COVID-19 cases from NY Times at US sta... \n", + "\n", + " Columns Partitions \n", + "0 date, county, state, fips, cases, deaths \n", + "1 date, state, fips, cases, deaths " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.catalog.tables(database=\"covid-19\", name_contains=\"nyt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datecountystatefipscasesdeaths
02020-01-21SnohomishWashington5306110
12020-01-22SnohomishWashington5306110
22020-01-23SnohomishWashington5306110
32020-01-24CookIllinois1703110
42020-01-24SnohomishWashington5306110
52020-01-25OrangeCalifornia0605910
62020-01-25CookIllinois1703110
72020-01-25SnohomishWashington5306110
82020-01-26MaricopaArizona0401310
92020-01-26Los AngelesCalifornia0603710
\n", + "
" + ], + "text/plain": [ + " date county state fips cases deaths\n", + "0 2020-01-21 Snohomish Washington 53061 1 0\n", + "1 2020-01-22 Snohomish Washington 53061 1 0\n", + "2 2020-01-23 Snohomish Washington 53061 1 0\n", + "3 2020-01-24 Cook Illinois 17031 1 0\n", + "4 2020-01-24 Snohomish Washington 53061 1 0\n", + "5 2020-01-25 Orange California 06059 1 0\n", + "6 2020-01-25 Cook Illinois 17031 1 0\n", + "7 2020-01-25 Snohomish Washington 53061 1 0\n", + "8 2020-01-26 Maricopa Arizona 04013 1 0\n", + "9 2020-01-26 Los Angeles California 06037 1 0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.athena.read_sql_query(\"SELECT * FROM nytimes_counties limit 10\", database=\"covid-19\", ctas_approach=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datecountystatefipsconfirmeddeathspopulationcounty2Hospitalhospital_fipslicensed_bedsstaffed_bedsicu_bedsbed_utilizationpotential_increase_bed_capacity
02020-04-12ParkMontana300677016736Park030067252540.4325480
12020-04-12RavalliMontana300813043172Ravalli030081252550.5677810
22020-04-12Silver BowMontana3009311034993Silver Bow0300939871110.55145727
32020-04-12ClayNebraska31035206214Clay<NA><NA><NA><NA><NA>NaN<NA>
42020-04-12CumingNebraska31039208940Cuming031039252540.2044930
................................................
2276842020-06-11HockleyTexas4821928122980Hockley048219484880.1206050
2276852020-06-11HudspethTexas482291104795Hudspeth<NA><NA><NA><NA><NA>NaN<NA>
2276862020-06-11JonesTexas48253633019817Jones04825345710.71859138
2276872020-06-11La SalleTexas48283407531La Salle<NA><NA><NA><NA><NA>NaN<NA>
2276882020-06-11LimestoneTexas4829336123519Limestone048293786990.1639409
\n", + "

227689 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " date county state fips confirmed deaths population \\\n", + "0 2020-04-12 Park Montana 30067 7 0 16736 \n", + "1 2020-04-12 Ravalli Montana 30081 3 0 43172 \n", + "2 2020-04-12 Silver Bow Montana 30093 11 0 34993 \n", + "3 2020-04-12 Clay Nebraska 31035 2 0 6214 \n", + "4 2020-04-12 Cuming Nebraska 31039 2 0 8940 \n", + "... ... ... ... ... ... ... ... \n", + "227684 2020-06-11 Hockley Texas 48219 28 1 22980 \n", + "227685 2020-06-11 Hudspeth Texas 48229 11 0 4795 \n", + "227686 2020-06-11 Jones Texas 48253 633 0 19817 \n", + "227687 2020-06-11 La Salle Texas 48283 4 0 7531 \n", + "227688 2020-06-11 Limestone Texas 48293 36 1 23519 \n", + "\n", + " county2 Hospital hospital_fips licensed_beds staffed_beds \\\n", + "0 Park 0 30067 25 25 \n", + "1 Ravalli 0 30081 25 25 \n", + "2 Silver Bow 0 30093 98 71 \n", + "3 Clay \n", + "4 Cuming 0 31039 25 25 \n", + "... ... ... ... ... ... \n", + "227684 Hockley 0 48219 48 48 \n", + "227685 Hudspeth \n", + "227686 Jones 0 48253 45 7 \n", + "227687 La Salle \n", + "227688 Limestone 0 48293 78 69 \n", + "\n", + " icu_beds bed_utilization potential_increase_bed_capacity \n", + "0 4 0.432548 0 \n", + "1 5 0.567781 0 \n", + "2 11 0.551457 27 \n", + "3 NaN \n", + "4 4 0.204493 0 \n", + "... ... ... ... \n", + "227684 8 0.120605 0 \n", + "227685 NaN \n", + "227686 1 0.718591 38 \n", + "227687 NaN \n", + "227688 9 0.163940 9 \n", + "\n", + "[227689 rows x 15 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql = \"\"\"\n", + "SELECT \n", + " j.*, \n", + " co.Population, \n", + " co.county AS county2, \n", + " hb.* \n", + "FROM \n", + " (\n", + " SELECT \n", + " date, \n", + " county, \n", + " state, \n", + " fips, \n", + " cases as confirmed, \n", + " deaths \n", + " FROM \"covid-19\".nytimes_counties\n", + " ) j \n", + " LEFT OUTER JOIN (\n", + " SELECT \n", + " DISTINCT county, \n", + " state, \n", + " \"population estimate 2018\" AS Population \n", + " FROM \n", + " \"covid-19\".county_populations \n", + " WHERE \n", + " state IN (\n", + " SELECT \n", + " DISTINCT state \n", + " FROM \n", + " \"covid-19\".nytimes_counties\n", + " ) \n", + " AND county IN (\n", + " SELECT \n", + " DISTINCT county as county \n", + " FROM \"covid-19\".nytimes_counties\n", + " )\n", + " ) co ON co.county = j.county \n", + " AND co.state = j.state \n", + " LEFT OUTER JOIN (\n", + " SELECT \n", + " count(objectid) as Hospital, \n", + " fips as hospital_fips, \n", + " sum(num_licensed_beds) as licensed_beds, \n", + " sum(num_staffed_beds) as staffed_beds, \n", + " sum(num_icu_beds) as icu_beds, \n", + " avg(bed_utilization) as bed_utilization, \n", + " sum(\n", + " potential_increase_in_bed_capac\n", + " ) as potential_increase_bed_capacity \n", + " FROM \"covid-19\".hospital_beds \n", + " WHERE \n", + " fips in (\n", + " SELECT \n", + " DISTINCT fips \n", + " FROM \n", + " \"covid-19\".nytimes_counties\n", + " ) \n", + " GROUP BY \n", + " 2\n", + " ) hb ON hb.hospital_fips = j.fips\n", + "\"\"\"\n", + "\n", + "wr.athena.read_sql_query(sql, database=\"covid-19\", ctas_approach=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create Dataset with custom SQL option" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "wr.quicksight.create_athena_dataset(\n", + " name=\"covid19-nytimes-usa\",\n", + " sql=sql,\n", + " sql_name='CustomSQL',\n", + " data_source_name=\"covid-19\",\n", + " import_mode='SPICE',\n", + " allowed_to_manage=[\"dev\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "ingestion_id = wr.quicksight.create_ingestion(\"covid19-nytimes-usa\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Wait ingestion" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "while wr.quicksight.describe_ingestion(ingestion_id=ingestion_id, dataset_name=\"covid19-nytimes-usa\")[\"IngestionStatus\"] not in [\"COMPLETED\", \"FAILED\"]:\n", + " sleep(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Describe last ingestion" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'RowsIngested': 227689, 'RowsDropped': 0}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.quicksight.describe_ingestion(ingestion_id=ingestion_id, dataset_name=\"covid19-nytimes-usa\")[\"RowInfo\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "List all ingestions" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'time': datetime.datetime(2020, 6, 12, 15, 13, 46, 996000, tzinfo=tzlocal()),\n", + " 'source': 'MANUAL'},\n", + " {'time': datetime.datetime(2020, 6, 12, 15, 13, 42, 344000, tzinfo=tzlocal()),\n", + " 'source': 'MANUAL'}]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[{\"time\": user[\"CreatedTime\"], \"source\": user[\"RequestSource\"]} for user in wr.quicksight.list_ingestions(\"covid19-nytimes-usa\")]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create new dataset from a table directly" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "wr.quicksight.create_athena_dataset(\n", + " name=\"covid-19-tableau_jhu\",\n", + " table=\"tableau_jhu\",\n", + " data_source_name=\"covid-19\",\n", + " database=\"covid-19\",\n", + " import_mode='DIRECT_QUERY',\n", + " rename_columns={\n", + " \"cases\": \"Count_of_Cases\", \n", + " \"combined_key\": \"County\"\n", + " },\n", + " cast_columns_types={\n", + " \"Count_of_Cases\": \"INTEGER\"\n", + " },\n", + " allowed_to_manage=[\"dev\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cleaning up" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "wr.quicksight.delete_data_source(\"covid-19\")\n", + "wr.quicksight.delete_dataset(\"covid19-nytimes-usa\")\n", + "wr.quicksight.delete_dataset(\"covid-19-tableau_jhu\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "awswrangler", + "language": "python", + "name": "awswrangler" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}