In [1]:
%%script echo skipping
%pip install snowflake-connector-python
%pip install pyarrow
%pip install pandas

skipping


In [2]:
import os
import pandas as pd
import snowflake.connector
from dotenv import load_dotenv

  warn_incompatible_dep(


In [3]:
load_dotenv()

True

In [4]:
SNOW_USER = os.environ['SNOW_USER']
ACCOUNT = os.environ['ACCOUNT']
PASSWORD = os.environ['PASSWORD']
WAREHOUSE = os.environ['WAREHOUSE']
DATABASE = os.environ['DATABASE']
SIGMA_SCHEMA = os.environ['SIGMA_SCHEMA']
STAGING_SCHEMA = os.environ['STAGING_SCHEMA']


In [5]:
def connect_to_snowflake() -> snowflake.connector.cursor:
    """
    Connecting to the snowflake database
    """
    conn = snowflake.connector.connect(
        user= SNOW_USER,
        account= ACCOUNT,
        password= PASSWORD,
        warehouse= WAREHOUSE,
        database= DATABASE,
    )
    return conn.cursor()

In [6]:
def show_schemas(cs:snowflake.connector.cursor) -> list:
    """ 
    Check which schemas are in our database
    """
    return cs.execute("SHOW SCHEMAS;").fetchall()

In [7]:
def create_staging_schema(cs:snowflake.connector.cursor):
    """ 
    Adds the staging schema to the database
    """
    cs.execute(f"CREATE SCHEMA IF NOT EXISTS {STAGING_SCHEMA}")


In [8]:
def use_staging_schema(cs:snowflake.connector.cursor):
    """ 
    Ensure that cursor is using the staging schema
    """
    cs.execute(f"USE SCHEMA {STAGING_SCHEMA}")

In [9]:
def add_test_table(cs:snowflake.connector.cursor):
    """ 
    Adding test tables to schema to make sure we are using staging schema
    """
    cs.execute(
        "CREATE OR REPLACE TABLE test_table(col1 integer, col2 string)")
    cs.execute(
        "INSERT INTO test_table(col1, col2) VALUES(123, 'xyz'), (456, 'zyx')")

In [10]:
def show_tables(cs:snowflake.connector.cursor, schema) -> list:
    """
    Check which tables are in the current schema the given cursor is using
    """
    return cs.execute(f"SHOW TABLES IN {schema}").fetchall()

In [11]:
def fetch_test_data(cs:snowflake.connector.cursor) -> pd.DataFrame:
    """ 
    Test that insert worked and that we can start work with pandas dataframes
    """
    return cs.execute("SELECT * FROM test_table").fetch_pandas_all()

In [12]:
cs = connect_to_snowflake()
show_schemas(cs)

[(datetime.datetime(2022, 10, 4, 5, 16, 25, 303000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'INFORMATION_SCHEMA',
  'N',
  'N',
  'DELATON',
  '',
  'Views describing the contents of schemas in this database',
  '',
  '1'),
 (datetime.datetime(2022, 9, 29, 1, 9, 6, 401000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'PUBLIC',
  'N',
  'N',
  'DELATON',
  'SYSADMIN',
  '',
  '',
  '1'),
 (datetime.datetime(2022, 9, 29, 1, 9, 44, 900000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'TEC_BIKE_PRODUCTION',
  'N',
  'N',
  'DELATON',
  'SYSADMIN',
  '',
  '',
  '1'),
 (datetime.datetime(2022, 10, 4, 4, 39, 6, 77000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'YUSRA_STORIES_STAGING',
  'N',
  'N',
  'DELATON',
  'SYSADMIN',
  '',
  '',
  '1'),
 (datetime.datetime(2022, 10, 4, 3, 4, 46, 918000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'ZOOKEEPERS_BATCH_PRODUCTION

In [13]:
create_staging_schema(cs)
use_staging_schema(cs)

In [14]:
show_tables(cs, STAGING_SCHEMA)

[(datetime.datetime(2022, 10, 4, 4, 39, 6, 220000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'TEST_TABLE',
  'DELATON',
  'YUSRA_STORIES_STAGING',
  'TABLE',
  '',
  '',
  2,
  1024,
  'SYSADMIN',
  '1',
  'OFF',
  'OFF',
  'N')]

In [15]:
test_df = fetch_test_data(cs)
test_df

Unnamed: 0,COL1,COL2
0,123,test string1
1,456,test string2
