In [1]:
import sys
import logging

from snowflake.snowpark import Session, DataFrame
from snowflake.snowpark.types import StructType, StringType, StructField, StringType,LongType,DecimalType,DateType,TimestampType
from snowflake.snowpark.functions import col,lit,row_number, rank
from snowflake.snowpark import Window

In [2]:
# initiate logging at info level
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%I:%M:%S')

In [3]:
# snowpark session
def get_snowpark_session() -> Session:
    connection_parameters = {
       "ACCOUNT":"account",
        "USER":"user",
        "PASSWORD":"password",
        "ROLE":"SYSADMIN",
        "DATABASE":"SALES_DWH",
        "SCHEMA":"SOURCE",
        "WAREHOUSE":"SNOWPARK_ETL_WH"
    }
    # creating snowflake session object
    return Session.builder.configs(connection_parameters).create()

   

In [4]:
session = get_snowpark_session()

context_df = session.sql("select current_role(), current_database(), current_schema(), current_warehouse()")
context_df.show(2)

05:01:28 - INFO - Snowflake Connector for Python Version: 3.7.0, Python Version: 3.8.8, Platform: macOS-10.16-x86_64-i386-64bit
05:01:28 - INFO - This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
05:01:28 - INFO - Snowpark Session information: 
"version" : 1.13.0,
"python.version" : 3.8.8,
"python.connector.version" : 3.7.0,
"python.connector.session.id" : 1276855122583914,
"os.name" : Darwin

05:01:28 - INFO - query: [select current_role(), current_database(), current_schema(), current_warehouse()...]
05:01:28 - INFO - query execution done
05:01:28 - INFO - Number of results in first chunk: 0
05:01:28 - INFO - query: [SELECT  *  FROM (select current_role(), current_database(), current_schema(), cu...]
05:01:28 - INFO - query execution done
05:01:28 - INFO - Number of results in first chunk: 1
------

In [7]:

def ingest_in_sales(session)-> None:
    session.sql(" \
            copy into SALES_DWH.SOURCE.IN_SALES_ORDER from ( \
            select \
            IN_SALES_ORDER_SEQ.nextval, \
            t.$1::text as order_id, \
            t.$2::text as customer_name, \
            t.$3::text as mobile_key,\
            t.$4::number as order_quantity, \
            t.$5::number as unit_price, \
            t.$6::number as order_valaue,  \
            t.$7::text as promotion_code , \
            t.$8::number(10,2)  as final_order_amount,\
            t.$9::number(10,2) as tax_amount,\
            t.$10::date as order_dt,\
            t.$11::text as payment_status,\
            t.$12::text as shipping_status,\
            t.$13::text as payment_method,\
            t.$14::text as payment_provider,\
            t.$15::text as mobile,\
            t.$16::text as shipping_address,\
            metadata$filename as stg_file_name,\
            metadata$file_row_number as stg_row_numer,\
            metadata$file_last_modified as \"STG_LAST_MODIFIED\"\
            from \
            @SALES_DWH.SOURCE.my_internal_stg/sales/source=IN/format=csv/ \
            (                                                             \
                file_format => 'SALES_DWH.COMMON.MY_CSV_FORMAT'           \
            ) t  )  on_error = 'Continue'     \
            "
            ).collect()


In [8]:
def ingest_fr_sales(session)-> None:
    session.sql(' \
        copy into SALES_DWH.SOURCE.FR_SALES_ORDER                               \
        from                                                    \
        (                                                       \
            select                                              \
            sales_dwh.source.fr_sales_order_seq.nextval,         \
            $1:"Order ID"::text as orde_id,                   \
            $1:"Customer Name"::text as customer_name,          \
            $1:"Mobile Model"::text as mobile_key,              \
            to_number($1:"Quantity") as quantity,               \
            to_number($1:"Price per Unit") as unit_price,       \
            to_decimal($1:"Total Price") as total_price,        \
            $1:"Promotion Code"::text as promotion_code,        \
            $1:"Order Amount"::number(10,2) as order_amount,    \
            to_decimal($1:"Tax") as tax,                        \
            $1:"Order Date"::date as order_dt,                  \
            $1:"Payment Status"::text as payment_status,        \
            $1:"Shipping Status"::text as shipping_status,      \
            $1:"Payment Method"::text as payment_method,        \
            $1:"Payment Provider"::text as payment_provider,    \
            $1:"Phone"::text as phone,                          \
            $1:"Delivery Address"::text as shipping_address ,    \
            metadata$filename as stg_file_name,\
            metadata$file_row_number as stg_row_numer,\
            metadata$file_last_modified as \"STG_LAST_MODIFIED\" \
            from                                                \
            @SALES_DWH.SOURCE.my_internal_stg/sales/source=FR/format=json/\
            (file_format => SALES_DWH.COMMON.MY_JSON_FORMAT)\
             ) on_error=continue\
        '
        ).collect()

In [6]:
def ingest_us_sales(session)-> None:
    session.sql(' \
            copy into SALES_DWH.SOURCE.US_SALES_ORDER               \
            from                                    \
            (                                       \
                select                              \
                us_sales_order_seq.nextval, \
                $1:"Order ID"::text as orde_id,   \
                $1:"Customer Name"::text as customer_name,\
                $1:"Mobile Model"::text as mobile_key,\
                to_number($1:"Quantity") as quantity,\
                to_number($1:"Price per Unit") as unit_price,\
                to_decimal($1:"Total Price") as total_price,\
                $1:"Promotion Code"::text as promotion_code,\
                $1:"Order Amount"::number(10,2) as order_amount,\
                to_decimal($1:"Tax") as tax,\
                $1:"Order Date"::date as order_dt,\
                $1:"Payment Status"::text as payment_status,\
                $1:"Shipping Status"::text as shipping_status,\
                $1:"Payment Method"::text as payment_method,\
                $1:"Payment Provider"::text as payment_provider,\
                $1:"Phone"::text as phone,\
                $1:"Delivery Address"::text as shipping_address,\
                metadata$filename as stg_file_name,\
                metadata$file_row_number as stg_row_numer,\
                metadata$file_last_modified as \"STG_LAST_MODIFIED\"\
                from                                \
                    @SALES_DWH.SOURCE.my_internal_stg/sales/source=US/format=parquet/\
                    (file_format => SALES_DWH.COMMON.MY_PARQUET_FORMAT)\
                    ) on_error = continue \
            '
            ).collect()
   

In [9]:
def main():

    #get the session object and get dataframe
    session = get_snowpark_session()

    #ingest in sales data
    ingest_in_sales(session)

    #ingest in sales data
    ingest_us_sales(session) 

    #ingest in sales data
    ingest_fr_sales(session)   

if __name__ == '__main__':
    main()

05:01:43 - INFO - Snowflake Connector for Python Version: 3.7.0, Python Version: 3.8.8, Platform: macOS-10.16-x86_64-i386-64bit
05:01:43 - INFO - This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
05:01:44 - INFO - Snowpark Session information: 
"version" : 1.13.0,
"python.version" : 3.8.8,
"python.connector.version" : 3.7.0,
"python.connector.session.id" : 1276855122551114,
"os.name" : Darwin

05:01:44 - INFO - query: [copy into SALES_DWH.SOURCE.IN_SALES_ORDER from (             select             ...]
05:01:45 - INFO - query execution done
05:01:45 - INFO - Number of results in first chunk: 1
05:01:45 - INFO - query: [copy into SALES_DWH.SOURCE.US_SALES_ORDER                           from        ...]
05:01:46 - INFO - query execution done
05:01:46 - INFO - Number of results in first chunk: 1
05:01: