diff --git a/README.md b/README.md index 5a0318e04..d0601bb19 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,10 @@ df = wr.athena.read_sql_query("SELECT * FROM my_table", database="my_db") - [06 - Amazon Athena](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/06%20-%20Amazon%20Athena.ipynb) - [07 - Databases (Redshift, MySQL and PostgreSQL)](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/07%20-%20Redshift%2C%20MySQL%2C%20PostgreSQL.ipynb) - [08 - Redshift - Copy & Unload.ipynb](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/08%20-%20Redshift%20-%20Copy%20%26%20Unload.ipynb) - - [09 - Redshift - Append, Overwrite and Upsert.ipynb](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/09%20-%20Redshift%20-%20Append%2C%20Overwrite%2C%20Upsert.ipynb) - - [10 - Parquet Crawler.ipynb](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/10%20-%20Parquet%20Crawler.ipynb) + - [09 - Redshift - Append, Overwrite and Upsert](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/09%20-%20Redshift%20-%20Append%2C%20Overwrite%2C%20Upsert.ipynb) + - [10 - Parquet Crawler](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/10%20-%20Parquet%20Crawler.ipynb) + - [11 - CSV Datasets](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/04%20-%20CSV%20Datasets.ipynb) + - [12 - CSV Crawler](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/10%20-%CSV%20Crawler.ipynb) - [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/latest/api.html) - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-s3) - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#aws-glue-catalog) diff --git a/tutorials/11 - CSV Datasets.ipynb b/tutorials/11 - CSV Datasets.ipynb new file mode 100644 index 000000000..23fd68cf8 --- /dev/null +++ b/tutorials/11 - CSV Datasets.ipynb @@ -0,0 +1,528 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![AWS Data Wrangler](_static/logo.png \"AWS Data Wrangler\")](https://github.com/awslabs/aws-data-wrangler)\n", + "\n", + "# 4 - CSV Datasets\n", + "\n", + "Wrangler has 3 different write modes to store CSV Datasets on Amazon S3.\n", + "\n", + "- **append** (Default)\n", + "\n", + " Only adds new files without any delete.\n", + " \n", + "- **overwrite**\n", + "\n", + " Deletes everything in the target directory and then add new files.\n", + " \n", + "- **overwrite_partitions** (Partition Upsert)\n", + "\n", + " Only deletes the paths of partitions that should be updated and then writes the new partitions files. It's like a \"partition Upsert\"." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date\n", + "import awswrangler as wr\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Enter your bucket name:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + " ··········································\n" + ] + } + ], + "source": [ + "import getpass\n", + "bucket = getpass.getpass()\n", + "path = f\"s3://{bucket}/dataset/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating the Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvaluedate
01foo2020-01-01
12boo2020-01-02
\n", + "
" + ], + "text/plain": [ + " id value date\n", + "0 1 foo 2020-01-01\n", + "1 2 boo 2020-01-02" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({\n", + " \"id\": [1, 2],\n", + " \"value\": [\"foo\", \"boo\"],\n", + " \"date\": [date(2020, 1, 1), date(2020, 1, 2)]\n", + "})\n", + "\n", + "wr.s3.to_csv(\n", + " df=df,\n", + " path=path,\n", + " index=False,\n", + " dataset=True,\n", + " mode=\"overwrite\",\n", + " database=\"awswrangler_test\",\n", + " table=\"csv_dataset\"\n", + ")\n", + "\n", + "wr.athena.read_sql_table(database=\"awswrangler_test\", table=\"csv_dataset\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Appending" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvaluedate
01foo2020-01-01
12boo2020-01-02
23bar2020-01-03
\n", + "
" + ], + "text/plain": [ + " id value date\n", + "0 1 foo 2020-01-01\n", + "1 2 boo 2020-01-02\n", + "2 3 bar 2020-01-03" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({\n", + " \"id\": [3],\n", + " \"value\": [\"bar\"],\n", + " \"date\": [date(2020, 1, 3)]\n", + "})\n", + "\n", + "wr.s3.to_csv(\n", + " df=df,\n", + " path=path,\n", + " index=False,\n", + " dataset=True,\n", + " mode=\"append\",\n", + " database=\"awswrangler_test\",\n", + " table=\"csv_dataset\"\n", + ")\n", + "\n", + "wr.athena.read_sql_table(database=\"awswrangler_test\", table=\"csv_dataset\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overwriting" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvaluedate
03bar2020-01-03
\n", + "
" + ], + "text/plain": [ + " id value date\n", + "0 3 bar 2020-01-03" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.s3.to_csv(\n", + " df=df,\n", + " path=path,\n", + " index=False,\n", + " dataset=True,\n", + " mode=\"overwrite\",\n", + " database=\"awswrangler_test\",\n", + " table=\"csv_dataset\"\n", + ")\n", + "\n", + "wr.athena.read_sql_table(database=\"awswrangler_test\", table=\"csv_dataset\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating a **Partitoned** Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvaluedate
02boo2020-01-02
11foo2020-01-01
\n", + "
" + ], + "text/plain": [ + " id value date\n", + "0 2 boo 2020-01-02\n", + "1 1 foo 2020-01-01" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({\n", + " \"id\": [1, 2],\n", + " \"value\": [\"foo\", \"boo\"],\n", + " \"date\": [date(2020, 1, 1), date(2020, 1, 2)]\n", + "})\n", + "\n", + "wr.s3.to_csv(\n", + " df=df,\n", + " path=path,\n", + " index=False,\n", + " dataset=True,\n", + " mode=\"overwrite\",\n", + " database=\"awswrangler_test\",\n", + " table=\"csv_dataset\",\n", + " partition_cols=[\"date\"]\n", + ")\n", + "\n", + "wr.athena.read_sql_table(database=\"awswrangler_test\", table=\"csv_dataset\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upserting partitions (overwrite_partitions)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvaluedate
02xoo2020-01-02
13bar2020-01-03
21foo2020-01-01
\n", + "
" + ], + "text/plain": [ + " id value date\n", + "0 2 xoo 2020-01-02\n", + "1 3 bar 2020-01-03\n", + "2 1 foo 2020-01-01" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "df = pd.DataFrame({\n", + " \"id\": [2, 3],\n", + " \"value\": [\"xoo\", \"bar\"],\n", + " \"date\": [date(2020, 1, 2), date(2020, 1, 3)]\n", + "})\n", + "\n", + "wr.s3.to_csv(\n", + " df=df,\n", + " path=path,\n", + " index=False,\n", + " dataset=True,\n", + " mode=\"overwrite_partitions\",\n", + " database=\"awswrangler_test\",\n", + " table=\"csv_dataset\",\n", + " partition_cols=[\"date\"]\n", + ")\n", + "\n", + "wr.athena.read_sql_table(database=\"awswrangler_test\", table=\"csv_dataset\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tutorials/12 - CSV Crawler.ipynb b/tutorials/12 - CSV Crawler.ipynb new file mode 100644 index 000000000..d3e4bd710 --- /dev/null +++ b/tutorials/12 - CSV Crawler.ipynb @@ -0,0 +1,707 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![AWS Data Wrangler](_static/logo.png \"AWS Data Wrangler\")](https://github.com/awslabs/aws-data-wrangler)\n", + "\n", + "# 12 - CSV Crawler\n", + "\n", + "[Wrangler](https://github.com/awslabs/aws-data-wrangler) can extract only the metadata from a Pandas DataFrame and then add it can be added to Glue Catalog as a table." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import awswrangler as wr\n", + "from datetime import datetime\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Enter your bucket name:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + " ··········································\n" + ] + } + ], + "source": [ + "import getpass\n", + "bucket = getpass.getpass()\n", + "path = f\"s3://{bucket}/csv_crawler/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a Pandas DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idstringfloatdatetimestampboolpar0par1
01foo1.02020-01-012020-01-01 00:00:00True1a
12NoneNaNNoneNaTNone1b
23boo2.02020-01-022020-01-02 00:00:01False2b
\n", + "
" + ], + "text/plain": [ + " id string float date timestamp bool par0 par1\n", + "0 1 foo 1.0 2020-01-01 2020-01-01 00:00:00 True 1 a\n", + "1 2 None NaN None NaT None 1 b\n", + "2 3 boo 2.0 2020-01-02 2020-01-02 00:00:01 False 2 b" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts = lambda x: datetime.strptime(x, \"%Y-%m-%d %H:%M:%S.%f\") # noqa\n", + "dt = lambda x: datetime.strptime(x, \"%Y-%m-%d\").date() # noqa\n", + "\n", + "df = pd.DataFrame(\n", + " {\n", + " \"id\": [1, 2, 3],\n", + " \"string\": [\"foo\", None, \"boo\"],\n", + " \"float\": [1.0, None, 2.0],\n", + " \"date\": [dt(\"2020-01-01\"), None, dt(\"2020-01-02\")],\n", + " \"timestamp\": [ts(\"2020-01-01 00:00:00.0\"), None, ts(\"2020-01-02 00:00:01.0\")],\n", + " \"bool\": [True, None, False],\n", + " \"par0\": [1, 1, 2],\n", + " \"par1\": [\"a\", \"b\", \"b\"],\n", + " }\n", + ")\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extracting the metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "columns_types, partitions_types = wr.catalog.extract_athena_types(\n", + " df=df,\n", + " file_format=\"csv\",\n", + " index=False,\n", + " partition_cols=[\"par0\", \"par1\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 'bigint',\n", + " 'string': 'string',\n", + " 'float': 'double',\n", + " 'date': 'date',\n", + " 'timestamp': 'timestamp',\n", + " 'bool': 'boolean'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns_types" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'par0': 'bigint', 'par1': 'string'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "partitions_types" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating the table" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "wr.catalog.create_csv_table(\n", + " table=\"csv_crawler\",\n", + " database=\"awswrangler_test\",\n", + " path=path,\n", + " partitions_types=partitions_types,\n", + " columns_types=columns_types,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checking" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Column NameTypePartitionComment
0idbigintFalse
1stringstringFalse
2floatdoubleFalse
3datedateFalse
4timestamptimestampFalse
5boolbooleanFalse
6par0bigintTrue
7par1stringTrue
\n", + "
" + ], + "text/plain": [ + " Column Name Type Partition Comment\n", + "0 id bigint False \n", + "1 string string False \n", + "2 float double False \n", + "3 date date False \n", + "4 timestamp timestamp False \n", + "5 bool boolean False \n", + "6 par0 bigint True \n", + "7 par1 string True " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.catalog.table(database=\"awswrangler_test\", table=\"csv_crawler\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## We can still using the extracted metadata to ensure all data types consistence to new data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idstringfloatdatetimestampboolpar0par1
01112020-01-012020-01-0211a
\n", + "
" + ], + "text/plain": [ + " id string float date timestamp bool par0 par1\n", + "0 1 1 1 2020-01-01 2020-01-02 1 1 a" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(\n", + " {\n", + " \"id\": [1],\n", + " \"string\": [\"1\"],\n", + " \"float\": [1],\n", + " \"date\": [ts(\"2020-01-01 00:00:00.0\")],\n", + " \"timestamp\": [dt(\"2020-01-02\")],\n", + " \"bool\": [1],\n", + " \"par0\": [1],\n", + " \"par1\": [\"a\"],\n", + " }\n", + ")\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "res = wr.s3.to_csv(\n", + " df=df,\n", + " path=path,\n", + " index=False,\n", + " dataset=True,\n", + " database=\"awswrangler_test\",\n", + " table=\"csv_crawler\",\n", + " partition_cols=[\"par0\", \"par1\"],\n", + " dtype=columns_types\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## You can also extract the metadata directly from the Catalog with you want" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "dtype = wr.catalog.get_table_types(database=\"awswrangler_test\", table=\"csv_crawler\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "res = wr.s3.to_csv(\n", + " df=df,\n", + " path=path,\n", + " index=False,\n", + " dataset=True,\n", + " database=\"awswrangler_test\",\n", + " table=\"csv_crawler\",\n", + " partition_cols=[\"par0\", \"par1\"],\n", + " dtype=dtype\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checking out" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idstringfloatdatetimestampboolpar0par1
0111.02020-01-012020-01-02True1a
1111.02020-01-012020-01-02True1a
\n", + "
" + ], + "text/plain": [ + " id string float date timestamp bool par0 par1\n", + "0 1 1 1.0 2020-01-01 2020-01-02 True 1 a\n", + "1 1 1 1.0 2020-01-01 2020-01-02 True 1 a" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = wr.athena.read_sql_table(database=\"awswrangler_test\", table=\"csv_crawler\")\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id Int64\n", + "string string\n", + "float float64\n", + "date object\n", + "timestamp datetime64[ns]\n", + "bool boolean\n", + "par0 Int64\n", + "par1 string\n", + "dtype: object" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleaning Up S3" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "wr.s3.delete_objects(path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleaning Up the Database" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.catalog.delete_table_if_exists(database=\"awswrangler_test\", table=\"csv_crawler\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}