In [2]:
from snowflake.sqlalchemy import URL
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

load_dotenv('../../.env')

def create_snowflake_conn(account, user, password, database, schema, role, warehouse):
    """
    Connect to snowflake
    """

    url = URL(
        account = account,
        user= user,
        password = password,
        database = database,
        schema = schema,
        role = role,
        warehouse = warehouse
    )

    return create_engine(url)

In [94]:
snow_params = {
    "account" : 'QYKNAZY-GD18580',
    "database" : 'crime_db',
    "schema" : 'PUBLIC',
    "role" : 'ACCOUNTADMIN',
    "warehouse" : 'crime_wh', # Can fetch dynamically when terraform provisions it
    "user" : 'hyderreza',
    "password" : os.getenv("SNOWFLAKE_DB_PASSWORD")
}

snow_engine = create_snowflake_conn(**snow_params)
snow_engine

Engine(snowflake://hyderreza:***@QYKNAZY-GD18580/crime_db/PUBLIC?role=ACCOUNTADMIN&warehouse=crime_wh)

In [96]:
# Check for the following: current db and wh
with snow_engine.connect() as conn:
    conn.execute('USE DATABASE "crime_db"')
    conn.execute('USE WAREHOUSE "crime_wh"')
    st = 'SELECT CURRENT_DATABASE(), CURRENT_WAREHOUSE(), CURRENT_ROLE()'
    res = conn.execute(st).fetchall()
    for item in res:
        print(item)

('crime_db', 'crime_wh', 'ACCOUNTADMIN')


In [100]:
# Create pipeline_logs table
def create_logs_table(engine):
    with engine.begin() as conn:
        conn.execute('USE DATABASE "crime_db"')
        conn.execute('USE WAREHOUSE "crime_wh"')

        table = f"""
            CREATE TABLE pipeline_logs (
                run_id INTEGER AUTOINCREMENT PRIMARY KEY, 
                ingested_at DATE, 
                source_updated_on DATE, 
                start_time TIME, 
                end_time TIME, 
                status VARCHAR(10), 
                mode VARCHAR(10), 
                config TEXT
            )
        """
        conn.execute(table)

create_logs_table(snow_engine)

In [None]:
# Create crime table

def create_crime_table(engine):
    with engine.begin() as conn:
        conn.execute('USE DATABASE "crime_db"')
        conn.execute('USE WAREHOUSE "crime_wh"')

        table = """
            CREATE TABLE crime (
                crime_id VARCHAR PRIMARY KEY,
                case VARCHAR,
                date_of_occurrence TIMESTAMP_TZ,
                block VARCHAR,
                iucr VARCHAR,
                primary_description VARCHAR,
                secondary_description VARCHAR,
                location_description VARCHAR,
                arrest VARCHAR,
                domestic VARCHAR,
                beat INTEGER,
                district INTEGER,
                ward INTEGER,
                community_area INTEGER,
                fbi_code VARCHAR,
                x_coordinate INTEGER,
                y_coordinate INTEGER,
                latitude FLOAT,
                longitude FLOAT,
                source_updated_on TIMESTAMP_TZ
            )
        """

        conn.execute(table)
        

In [109]:
# snow_engine.url.database.split("/")
snow_engine.url

snowflake://hyderreza:***@QYKNAZY-GD18580/crime_db/PUBLIC?role=ACCOUNTADMIN&warehouse=crime_wh

In [111]:
def create_date_table(engine):
    with engine.begin() as conn:
        conn.execute('USE DATABASE "crime_db"')
        conn.execute('USE WAREHOUSE "crime_wh"')

        table = """
            CREATE TABLE date (
                date DATE PRIMARY KEY,
                day INTEGER,
                month INTEGER,
                month_name VARCHAR,
                year INTEGER,
                day_of_week INTEGER,
                day_of_week_name VARCHAR,
                holiday_name VARCHAR
            );
        """

        conn.execute(table)
create_date_table(snow_engine)

In [99]:
# Drop table
with snow_engine.connect() as conn:
    conn.execute('DROP TABLE IF EXISTS "crime_db".public.pipeline_logs')