# Delete Resources and Data

In [1]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [2]:
import configparser
import os
import shutil
from glob import glob
from typing import Dict, Union

import pandas as pd
import snowflake.connector

## About

In this notebook, we will perform the following cleanup tasks
- delete database
- delete all supporting computing resources that were created (internal stage, file format)
- delete all locally saved processed data
- delete all locally downloaded raw data

## User Inputs

In [3]:
# Name of database table
table_names = ["inspections", "addressinfo"]

stage_name = "processed_dinesafe_data"
file_format_name = "COMMACOLSEP_ONEHEADROW"

raw_data_dir = "data/raw"
processed_data_dir = "data/processed"

ci_run = "no"

In [4]:
if ci_run == "yes":
    ACCOUNT = os.getenv("SNOWFLAKE_ACCOUNT")
    USER = os.getenv("SNOWFLAKE_USER")
    PASS = os.getenv("SNOWFLAKE_PASS")
    WAREHOUSE = os.getenv("SNOWFLAKE_WAREHOUSE")
    DB_SCHEMA = os.getenv("SNOWFLAKE_DB_SCHEMA")
    DB_NAME = "dinesafe"
else:
    config = configparser.ConfigParser()
    config.read("../sql.ini")
    default_cfg = config["default"]
    ACCOUNT = default_cfg["SNOWFLAKE_ACCOUNT"]
    USER = default_cfg["SNOWFLAKE_USER"]
    PASS = default_cfg["SNOWFLAKE_PASS"]
    WAREHOUSE = default_cfg["SNOWFLAKE_WAREHOUSE"]
    DB_SCHEMA = default_cfg["SNOWFLAKE_DB_SCHEMA"]
    DB_NAME = "dinesafe"

In [5]:
connector_dict = dict(
    account=ACCOUNT,
    user=USER,
    password=PASS,
    database=DB_NAME,
    schema="public",
    warehouse=WAREHOUSE,
    role="sysadmin",
)
connector_dict_no_db = dict(
    account=ACCOUNT,
    user=USER,
    password=PASS,
    warehouse=WAREHOUSE,
    role="sysadmin",
)

In [6]:
def show_sql_df(
    query: str,
    cursor,
    cnx=None,
    table_output: bool = False,
    use_manual_approach: bool = False,
) -> Union[None, pd.DataFrame]:
    cursor.execute(query)
    if cnx:
        cnx.commit()
    if table_output:
        if use_manual_approach:
            colnames = [cdesc[0].lower() for cdesc in cursor.description]
            cur_fetched = cursor.fetchall()
            if cur_fetched:
                df_query_output = pd.DataFrame.from_records(
                    cur_fetched, columns=colnames
                )
                with pd.option_context(
                    "display.max_columns", 200, "display.max_colwidth", 200
                ):
                    display(df_query_output)
                return df_query_output
        else:
            df_query_output = cursor.fetch_pandas_all()
            with pd.option_context(
                "display.max_columns", 200, "display.max_colwidth", 200
            ):
                display(df_query_output)
            return df_query_output
    return pd.DataFrame()

In [7]:
def delete_table(connector_dict: Dict, table_name: str) -> None:
    conn = snowflake.connector.connect(**connector_dict)
    cur = conn.cursor()
    for query in [
        f"TRUNCATE TABLE IF EXISTS {table_name}",
        f"DROP TABLE IF EXISTS {table_name}",
    ]:
        _ = cur.execute(query)
    query = f"""
            SHOW TABLES LIKE '%{table_name}%'
            """
    df = show_sql_df(query, cur, conn, True, True)
    assert df.empty
    print(f"Deleted table {table_name}")
    cur.close()
    conn.close()


def delete_stage(connector_dict: Dict, stage_name: str) -> None:
    conn = snowflake.connector.connect(**connector_dict)
    cur = conn.cursor()
    query = f"""
            DROP STAGE IF EXISTS {stage_name}
            """
    _ = cur.execute(query)
    query = f"""
            SHOW STAGES LIKE '%{stage_name}%'
            """
    df = show_sql_df(query, cur, conn, True, True)
    assert df.empty
    print(f"Deleted stage {stage_name}")
    cur.close()
    conn.close()


def delete_file_format(connector_dict: Dict, file_format_name: str) -> None:
    conn = snowflake.connector.connect(**connector_dict)
    cur = conn.cursor()
    query = f"""
            DROP FILE FORMAT IF EXISTS {file_format_name}
            """
    _ = cur.execute(query)
    query = f"""
            SHOW FILE FORMATS LIKE '%{file_format_name}%'
            """
    df = show_sql_df(query, cur, conn, True, True)
    assert df.empty
    print(f"Deleted file format {file_format_name}")
    cur.close()
    conn.close()


def delete_database(connector_dict_no_db: Dict, database_name: str) -> None:
    conn = snowflake.connector.connect(**connector_dict_no_db)
    cur = conn.cursor()
    query = f"""
            DROP DATABASE IF EXISTS {database_name}
            """
    _ = cur.execute(query)
    query = f"""
            SHOW DATABASES LIKE '%{database_name}%'
            """
    df = show_sql_df(query, cur, conn, True, True)
    assert df.empty
    print(f"Deleted database {database_name}")
    cur.close()
    conn.close()


def delete_local_processed_data(processed_data_dir: str) -> None:
    local_processed_data_files = glob(f"{processed_data_dir}/*.csv")
    for f in local_processed_data_files:
        os.remove(f)
    print(f"Deleted local processed data from {processed_data_dir}")


def delete_local_raw_data(raw_data_dir: str) -> None:
    local_raw_data_dirs = glob(f"{raw_data_dir}/*")
    for local_raw_data_dir in local_raw_data_dirs:
        if os.path.isdir(local_raw_data_dir):
            shutil.rmtree(local_raw_data_dir)
    print(f"Deleted local raw data from {raw_data_dir}")

## Perform Cleanup

### Delete Tables

In [8]:
%%time
for table_name in table_names:
    delete_table(connector_dict, table_name)

Deleted table inspections
Deleted table addressinfo
CPU times: user 322 ms, sys: 7.49 ms, total: 329 ms
Wall time: 2.25 s


### Delete Internal Stage

In [9]:
delete_stage(connector_dict, stage_name)

Deleted stage processed_dinesafe_data


### Delete File Format

In [10]:
delete_file_format(connector_dict, file_format_name)

Deleted file format COMMACOLSEP_ONEHEADROW


### Delete Database

In [11]:
delete_database(connector_dict_no_db, DB_NAME)

Deleted database dinesafe


### Delete Locally stored Processed Data

In [12]:
delete_local_processed_data(processed_data_dir)

Deleted local processed data from data/processed


### Delete Locally stored Raw Data

In [13]:
delete_local_raw_data(raw_data_dir)

Deleted local raw data from data/raw
