# Connect to Snowflake

In [17]:
from dotenv import load_dotenv
load_dotenv()     # loads keys into os.environ so the rest of your code sees them

True

In [18]:
# authenticate into Snowflake
from snowflake.snowpark import Session
import os
connection_parameters = {
    "account": os.getenv('SNOWFLAKE_ACCOUNT'),
    "user": os.getenv('SNOWFLAKE_USER'),
    "password": os.getenv('SNOWFLAKE_PASSWORD'),
    "role": os.getenv('SNOWFLAKE_ROLE'),
    "warehouse": os.getenv('SNOWFLAKE_WAREHOUSE'),
    "database": os.getenv('SNOWFLAKE_DATABASE'),
    "schema": os.getenv('SNOWFLAKE_SCHEMA')
}
session = Session.builder.configs(connection_parameters).create()

In [19]:
# check connection has been successful
print("Session Current Account:", session.get_current_account())

Session Current Account: "WEVIRIP-NA38028"


In [20]:
session.use_database("SNOWPARK_DEFINITIVE_GUIDE"),
session.use_schema("MY_SCHEMA")

purchase_history = session.table("PURCHASE_HISTORY")
campaign_info = session.table("CAMPAIGN_INFO")
complain_info = session.table("COMPLAINT_INFO")
marketing_additional = session.table("MARKETING_ADDITIONAL")

# Data Engineering Pipelines

## Step 1 - Joining Purchase History Table with Campaign Info

In [21]:
# join the purchase history with the campaign information
def combine_campaign_table(purchase_history, campaign_info):
    purchase_campaign = purchase_history.join(
        campaign_info, \
        purchase_history.ID == campaign_info.ID, \
        lsuffix='_left', rsuffix='_right'
    )
    purchase_campaign = purchase_campaign.drop('ID_RIGHT')
    return purchase_campaign

## Step 2 - Joining Purchase Campaign with Complain Info Table

In [22]:
# join the purchase campaign with the complaint information 
# using the same ID column and then create a purchase_campaign_complain DataFrame
def combine_complain_table(purchase_campaign, complain_info):
    purchase_campaign_complain = purchase_campaign.join(
        complain_info, \
        purchase_campaign['ID_LEFT'] == complain_info.ID
    )
    purchase_campaign_complain = purchase_campaign_complain.drop('ID_LEFT')
    return purchase_campaign_complain

## Step 3 - Union Additional Marketing Table with Purchase Campaign Complain Data

In [23]:
# marketing table is created by the union of the data between the purchase complaint and the marketing table
def union_marketing_additional_table(purchase_campaign_complain,marketing_additional):
    final_marketing_table = purchase_campaign_complain.union_by_name(marketing_additional)
    return final_marketing_table

## Stored Procedure - Data Preparation

In [24]:
from snowflake.snowpark.functions import sproc
import snowflake

def data_prep(session: Session) -> str:
    #### Loading Required Tables
    purchase_history = session.table("PURCHASE_HISTORY")
    campaign_info = session.table("CAMPAIGN_INFO")
    complain_info = session.table("COMPLAINT_INFO")
    marketing_additional = session.table("MARKETING_ADDITIONAL")

    #### Calling Step 1
    purchase_campaign = combine_campaign_table(purchase_history, campaign_info)

    #### Calling Step 2
    purchase_campaign_complain = combine_complain_table(purchase_campaign, complain_info)

    #### Calling Step 3
    final_marketing_data = union_marketing_additional_table(purchase_campaign_complain, marketing_additional)

    #### Writing Combined Data to New Table
    final_marketing_data.write.save_as_table('FINAL_MARKETING_DATA')
    return 'LOADED FINAL MARKETING DATA TABLE'

# Create an instance of StoredProcedure using the sproc() function
from snowflake.snowpark.types import IntegerType, StringType
data_prep_sproc = sproc(
                            func=data_prep,\
                            replace=True,\
                            return_type = StringType(),\
                            stage_location='@my_stage',\
                            packages=['snowflake-snowpark-python']
                        )

SnowparkSessionException: (1409): More than one active session is detected. When you call function 'udf' or use decorator '@udf', you must specify the 'session' parameter if you created multiple sessions.Alternatively, you can use 'session.udf.register' to register UDFs

# Close Snowflake Session

In [None]:
# always close a session
session.close()