diff --git a/awswrangler/athena.py b/awswrangler/athena.py index 35c5e6fd5..6759ed209 100644 --- a/awswrangler/athena.py +++ b/awswrangler/athena.py @@ -563,6 +563,7 @@ def read_sql_table( table: str, database: str, ctas_approach: bool = True, + categories: List[str] = None, chunksize: Optional[int] = None, s3_output: Optional[str] = None, workgroup: Optional[str] = None, @@ -614,6 +615,9 @@ def read_sql_table( ctas_approach: bool Wraps the query using a CTAS, and read the resulted parquet data on S3. If false, read the regular CSV on S3. + categories: List[str], optional + List of columns names that should be returned as pandas.Categorical. + Recommended for memory restricted environments. chunksize: int, optional If specified, return an generator where chunksize is the number of rows to include in each chunk. s3_output : str, optional @@ -645,6 +649,7 @@ def read_sql_table( sql=f'SELECT * FROM "{table}"', database=database, ctas_approach=ctas_approach, + categories=categories, chunksize=chunksize, s3_output=s3_output, workgroup=workgroup, diff --git a/testing/test_awswrangler/test_data_lake.py b/testing/test_awswrangler/test_data_lake.py index 2b2ae8f8a..7bcbd24ec 100644 --- a/testing/test_awswrangler/test_data_lake.py +++ b/testing/test_awswrangler/test_data_lake.py @@ -646,6 +646,8 @@ def test_category(bucket, database): ensure_data_types_category(df2) df2 = wr.athena.read_sql_query("SELECT * FROM test_category", database=database, categories=list(df.columns)) ensure_data_types_category(df2) + df2 = wr.athena.read_sql_table(table="test_category", database=database, categories=list(df.columns)) + ensure_data_types_category(df2) df2 = wr.athena.read_sql_query( "SELECT * FROM test_category", database=database, categories=list(df.columns), ctas_approach=False )