# Delete Data and Resources

In [None]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [None]:
import os
import shutil
from glob import glob
from typing import Union

import boto3
import pandas as pd
import snowflake.connector
from dotenv import find_dotenv, load_dotenv

## About

Clean up all resources created and local data.

## User Inputs

In [None]:
stations_db_name = "torbikestations"

trips_table_name = "trips"
station_stats_table_name = "station_stats"

trips_stage_name = "bikes_stage"
trips_file_format_name = "COMMASEP_ONEHEADROW"

ci_run = "no"

In [None]:
if ci_run == "no":
    load_dotenv(find_dotenv())


trips_db_name = os.getenv("DB_NAME")
snowflake_dict_no_db = dict(
    user=os.getenv("SNOWFLAKE_USER"),
    password=os.getenv("SNOWFLAKE_PASS"),
    account=os.getenv("SNOWFLAKE_ACCOUNT"),
    warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"),
    role="sysadmin",
)
snowflake_dict = dict(
    user=os.getenv("SNOWFLAKE_USER"),
    password=os.getenv("SNOWFLAKE_PASS"),
    account=os.getenv("SNOWFLAKE_ACCOUNT"),
    warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"),
    database=trips_db_name,
    schema=os.getenv("SNOWFLAKE_DB_SCHEMA"),
    role="sysadmin",
)
snowflake_station_stats_dict = dict(
    user=os.getenv("SNOWFLAKE_USER"),
    password=os.getenv("SNOWFLAKE_PASS"),
    account=os.getenv("SNOWFLAKE_ACCOUNT"),
    warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"),
    database=stations_db_name,
    schema=os.getenv("SNOWFLAKE_DB_SCHEMA"),
    role="sysadmin",
)

In [None]:
aws_region = os.getenv("AWS_REGION")
account_id = (
    boto3.client("sts", region_name=aws_region).get_caller_identity().get("Account")
)

In [None]:
def show_sql_df(
    query: str,
    cursor,
    table_output: bool = False,
) -> Union[None, pd.DataFrame]:
    cursor.execute(query)
    if table_output:
        colnames = [cdesc[0].lower() for cdesc in cursor.description]
        cur_fetched = cursor.fetchall()
        if cur_fetched:
            df_query_output = pd.DataFrame.from_records(cur_fetched, columns=colnames)
            display(df_query_output)
            return df_query_output
    return None

In [None]:
conn = snowflake.connector.connect(**snowflake_dict)
cur = conn.cursor()

## Delete AWS QuickSight Data Source

### Create AWS Python SDK Objects for Creating QuickSight Resources

In [None]:
qs_client_user = boto3.client("quicksight", region_name="us-east-1")
qs_client = boto3.client("quicksight", region_name=aws_region)

### Delete Data Source

In [None]:
ds_list = [
    ds
    for ds in qs_client.list_data_sources(AwsAccountId=account_id)["DataSources"]
    if ds["Name"] == trips_db_name
]
ds_list

In [None]:
for ds in ds_list:
    qs_client.delete_data_source(
        AwsAccountId=account_id, DataSourceId=ds["DataSourceId"]
    )

## Delete Snowflake Resources

### Trips Database Internal Data Stage

In [None]:
query = f"""
        DROP STAGE IF EXISTS {trips_stage_name}
        """
_ = cur.execute(query)

In [None]:
%%time
query = f"""
        SHOW STAGES LIKE '{trips_stage_name}'
        """
_ = show_sql_df(query, cur, True)

### Trips Database CSV File Format

In [None]:
query = f"""
        DROP FILE FORMAT IF EXISTS {trips_file_format_name}
        """
_ = cur.execute(query)

In [None]:
%%time
query = f"""
        SHOW FILE FORMATS LIKE '{trips_file_format_name}'
        """
_ = show_sql_df(query, cur, True)

### Trips Database Table

In [None]:
query = f"""
        DROP TABLE IF EXISTS {trips_table_name}
        """
_ = cur.execute(query)

In [None]:
%%time
query = f"""
        SHOW TABLES LIKE '{trips_table_name}'
        """
_ = show_sql_df(query, cur, True)

In [None]:
cur.close()
conn.close()

### Station Statistics Database Table

In [None]:
conn = snowflake.connector.connect(**snowflake_station_stats_dict)
cur = conn.cursor()

In [None]:
query = f"""
        DROP TABLE {station_stats_table_name}
        """
_ = cur.execute(query)

In [None]:
%%time
query = f"""
        SHOW TABLES LIKE '{station_stats_table_name}'
        """
_ = show_sql_df(query, cur, True)

In [None]:
cur.close()
conn.close()

### Databases

In [None]:
conn = snowflake.connector.connect(**snowflake_dict_no_db)
cur = conn.cursor()

In [None]:
for database_name in [trips_db_name, stations_db_name]:
    query = f"""
            DROP DATABASE {database_name}
            """
    _ = cur.execute(query)

In [None]:
%%time
query = """
        SHOW DATABASES
        """
df_databases = show_sql_df(query, cur, True)

In [None]:
database_list = [trips_db_name, stations_db_name]
assert df_databases.query("name.isin(@database_list)").empty

In [None]:
cur.close()
conn.close()

## Delete Local Data Files

### Raw Bikeshare Trips Data Files

In [None]:
for f in glob("data/raw/*.csv"):
    os.remove(f)

### Raw Files for Supplementary Datasets

In [None]:
raw_data_dirs = glob("data/raw/*")
for pdir in raw_data_dirs:
    if os.path.isdir(pdir):
        shutil.rmtree(pdir)