In [103]:
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from snowflake.snowpark.window import Window
from sklearn import preprocessing # https://github.com/Snowflake-Labs/snowpark-python-demos/tree/main/sp4py_utilities
from snowflake.snowpark.functions import col

import getpass
import pandas as pd
import matplotlib.pyplot as plt

In [104]:
#nj07294.ap-southeast-1

In [105]:
accountname = 'nj07294.ap-southeast-1'
#accountname = getpass.getpass() # ORGNAME-ACCOUNTNAME (separated by minus sign)

In [106]:
username = getpass.getpass()    # SNOWFLAKE-USERNAME

In [107]:
password = getpass.getpass()    # SNOWFLAKE-PASSWORD

In [108]:
connection_parameters = {
    "account": accountname,
    "user": username,
    "password": password,
    "role": "ACCOUNTADMIN",
    "database": "FROSTBYTE_TASTY_BYTES",

    "warehouse": "HOL_WH"
}

session = Session.builder.configs(connection_parameters).create()

# Filter

In [109]:
sdf = session.table('ANALYTICS.ORDERS_V')
df=sdf.filter(col("COUNTRY")=='United States')

In [110]:
df=sdf.filter(col("COUNTRY")=='United States')

In [111]:
df.show()

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"DATE"      |"ORDER_ID"  |"TRUCK_ID"  |"ORDER_TS"           |"ORDER_DETAIL_ID"  |"LINE_NUMBER"  |"TRUCK_BRAND_NAME"  |"MENU_TYPE"  |"PRIMARY_CITY"  |"REGION"  |"COUNTRY"      |"FRANCHISE_FLAG"  |"FRANCHISE_ID"  |"FRANCHISEE_FIRST_NAME"  |"FRANCHISEE_LAST_NAME"  |"LOCATION_ID"  |"CUSTOMER_ID"  |"FIRST_NAME"  |"LAST_NAME"  |"E_MAIL"  |"PHONE_NUMBER"  |"CHILDREN_COUNT"  |"GENDER"  |"MARITAL_STATUS"  |"MENU_ITEM_ID"  |"MENU_ITEM_NAME"   

### Extracting weather data

In [112]:
connection_parameters = {
    "account": accountname,
    "user": username,
    "password": password,
    "role": "ACCOUNTADMIN",
    "database": "GLOBAL_WEATHER__CLIMATE_DATA_FOR_BI",

    "warehouse": "HOL_WH"
}

wea_session = Session.builder.configs(connection_parameters).create()
wdf = wea_session.table('STANDARD_TILE.HISTORY_DAY')

In [113]:
wdf.show()


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [114]:
wdf2 = wdf.to_pandas()


In [115]:
wdf2.head()

Unnamed: 0,POSTAL_CODE,COUNTRY,DATE_VALID_STD,DOY_STD,MIN_TEMPERATURE_AIR_2M_F,AVG_TEMPERATURE_AIR_2M_F,MAX_TEMPERATURE_AIR_2M_F,MIN_TEMPERATURE_WETBULB_2M_F,AVG_TEMPERATURE_WETBULB_2M_F,MAX_TEMPERATURE_WETBULB_2M_F,...,TOT_PRECIPITATION_IN,TOT_SNOWFALL_IN,TOT_SNOWDEPTH_IN,MIN_CLOUD_COVER_TOT_PCT,AVG_CLOUD_COVER_TOT_PCT,MAX_CLOUD_COVER_TOT_PCT,MIN_RADIATION_SOLAR_TOTAL_WPM2,AVG_RADIATION_SOLAR_TOTAL_WPM2,MAX_RADIATION_SOLAR_TOTAL_WPM2,TOT_RADIATION_SOLAR_TOTAL_WPM2
0,102103,NG,2021-08-19,231,73.7,77.4,84.6,72.5,73.6,75.1,...,0.16,0.0,0.0,20,88,100,0.0,149.9,668.7,3596.4
1,110054,IN,2021-08-19,231,76.9,87.4,100.1,73.2,76.8,80.9,...,0.01,0.0,0.0,37,77,100,0.0,269.6,927.2,6470.4
2,2044,AU,2021-08-19,231,49.4,59.3,67.4,48.0,53.0,56.3,...,0.0,0.0,0.0,3,46,96,0.0,167.1,688.0,4010.1
3,21745-690,BR,2021-08-19,231,65.7,74.0,86.1,63.6,66.9,71.8,...,0.0,0.0,0.0,0,2,7,0.0,235.5,822.0,5651.3
4,60596,DE,2021-08-19,231,57.9,62.7,66.3,57.2,60.3,62.6,...,0.03,0.0,0.0,75,97,100,0.0,59.9,196.1,1438.4


### Drop columns (like discount amount, etc)

In [116]:
df=df.drop(['ORDER_DISCOUNT_AMOUNT','ORDER_TAX_AMOUNT','ORDER_AMOUNT','PRICE','UNIT_PRICE','QUANTITY','MENU_ITEM_NAME','MENU_ITEM_ID','MARITAL_STATUS','GENDER','CHILDREN_COUNT','PHONE_NUMBER','E_MAIL','LAST_NAME','FIRST_NAME','CUSTOMER_ID','FRANCHISEE_FIRST_NAME','FRANCHISEE_LAST_NAME','FRANCHISE_ID','FRANCHISE_FLAG','LINE_NUMBER','ORDER_ID'])

### Transform order_ts to year, month, day, day of the week, hour, public holiday binary

In [117]:
df=df.withColumn("Month",F.month(df["ORDER_TS"]))
df=df.withColumn("DOW",F.dayofweek(df["ORDER_TS"]))
df=df.withColumn("Day",F.dayofmonth(df["ORDER_TS"]))
df=df.withColumn("Hour",F.hour(df["ORDER_TS"]))
df=df.withColumn("WOM", ((F.dayofmonth(F.col('ORDER_TS')) - 1) / 7 + 1).cast('integer'))

In [118]:
# Create public holiday column binary
public_holidays = [
    {'Month': 7, 'Day': 4, 'DOW': None, 'WOM': None},  # 4th of July
    {'Month': 12, 'Day': 24, 'DOW': None, 'WOM': None},  # Christmas Eve
    {'Month': 12, 'Day': 25, 'DOW': None, 'WOM': None},  # Christmas Day
    {'Month': 10, 'Day': None, 'DOW': 'Monday', 'WOM': 2},  # Columbus Day (second Monday in October)
    {'Month': 6, 'Day': 19, 'DOW': None, 'WOM': None},  # Juneteenth
    {'Month': 9, 'Day': None, 'DOW': 'Monday', 'WOM': 1},  # Labor Day (first Monday in September)
    {'Month': 1, 'Day': None, 'DOW': 'Monday', 'WOM': 3},  # Martin Luther King, Jr. Day (third Monday in January)
    {'Month': 5, 'Day': None, 'DOW': 'Monday', 'WOM': -1},  # Memorial Day (last Monday in May)
    {'Month': 1, 'Day': 1, 'DOW': None, 'WOM': None},  # New Year's Day
    {'Month': 12, 'Day': 31, 'DOW': None, 'WOM': None},  # New Year's Eve
    {'Month': 11, 'Day': None, 'DOW': 'Thursday', 'WOM': 4},  # Thanksgiving Day (fourth Thursday in November)
    {'Month': 11, 'Day': None, 'DOW': 'Wednesday', 'WOM': 4},  # Thanksgiving Eve (fourth Wednesday in November)
    {'Month': 2, 'Day': 14, 'DOW': None, 'WOM': None},  # Valentine's Day
    {'Month': 11, 'Day': 11, 'DOW': None, 'WOM': None},  # Veterans Day
    {'Month': 10, 'Day': 31, 'DOW': None, 'WOM': None},  # Halloween
    {'Month': 3, 'Day': 17, 'DOW': None, 'WOM': None},  # St. Patrick's Day
    {'Month': 11, 'Day': 25, 'DOW': 'Friday', 'WOM': None},  # Black Friday
    {'Month': 12, 'Day': 26, 'DOW': None, 'WOM': None},  # Boxing Day
]

In [119]:
from snowflake.snowpark.functions import udf
from snowflake.snowpark.types import IntegerType
def is_public_holiday(month, day, dow, wom):
    for holiday in public_holidays:
        if holiday['Month'] == month and holiday['DOW'] == dow and holiday['WOM'] == wom:
            if holiday['Day'] is None:
                return True
            elif holiday['Day'] == day:
                return True
    return False

session.sql("USE SCHEMA RAW_POS").collect()
@udf(session=session, name='public_holiday', input_types=[IntegerType(), IntegerType(), IntegerType(), IntegerType()], return_type=IntegerType(), is_permanent=False, replace=True)
def public_holiday(month: int, day: int, dow: int, wom: int) -> int:
    if is_public_holiday(month, day, dow, wom):
        return 1
    else:
        return 0

df=df.withColumn('PUBLIC_HOLIDAY', public_holiday(F.month(F.col('DATE')), F.dayofmonth(F.col('DATE')), F.dayofweek(F.col('DATE')), ((F.col('Day') - 1) / 7 + 1).cast('integer')))

## Grouping

In [120]:
df.show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"DATE"      |"TRUCK_ID"  |"ORDER_TS"           |"ORDER_DETAIL_ID"  |"TRUCK_BRAND_NAME"  |"MENU_TYPE"  |"PRIMARY_CITY"  |"REGION"  |"COUNTRY"      |"LOCATION_ID"  |"ORDER_TOTAL"  |"MONTH"  |"DOW"  |"DAY"  |"HOUR"  |"WOM"  |"PUBLIC_HOLIDAY"  |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|2021-09-10  |68          |2021-09-10 11:55:55  |852794036          |Le Coin des Crêpes  |Crepes       |New York City   |New York  |United States  |3161           |60.0000        |9        |5      |10     |11      |2      |0                 |
|2021-09-10  |68          |2

In [121]:
grouped_df = df.groupBy("DATE","TRUCK_ID","MONTH","HOUR","DOW","DAY","Menu_Type","LOCATION_ID","PUBLIC_HOLIDAY").agg(F.sum("ORDER_TOTAL"))

# Joining with weather data

In [122]:
connection_parameters = {
    "account": accountname,
    "user": username,
    "password": password,
    "role": "ACCOUNTADMIN",
    "database": "GLOBAL_WEATHER__CLIMATE_DATA_FOR_BI",

    "warehouse": "HOL_WH"
}

wea_session = Session.builder.configs(connection_parameters).create()
wdf = wea_session.table('STANDARD_TILE.HISTORY_DAY')

In [123]:
wdf=wea_session.sql("select * From STANDARD_TILE.HISTORY_DAY")

In [124]:
print(type(wdf))
#print(type(semi_final_df))

<class 'snowflake.snowpark.dataframe.DataFrame'>


In [125]:
grouped_df.show()

------------------------------------------------------------------------------------------------------------------------------------
|"DATE"      |"TRUCK_ID"  |"MONTH"  |"HOUR"  |"DOW"  |"DAY"  |"MENU_TYPE"  |"LOCATION_ID"  |"PUBLIC_HOLIDAY"  |"SUM(ORDER_TOTAL)"  |
------------------------------------------------------------------------------------------------------------------------------------
|2022-04-06  |67          |4        |18      |3      |6      |Vegetarian   |15170          |0                 |13632.0000          |
|2022-04-06  |67          |4        |19      |3      |6      |Vegetarian   |15170          |0                 |13860.0000          |
|2022-04-06  |69          |4        |11      |3      |6      |Ethiopian    |12753          |0                 |13694.0000          |
|2022-04-06  |70          |4        |9       |3      |6      |Hot Dogs     |8429           |0                 |11776.0000          |
|2022-04-06  |70          |4        |13      |3      |6      |Hot Dog

In [126]:
wdf_re=wdf.with_column_renamed(col("DATE_VALID_STD"), "DATE")

In [127]:
sdf_loc = session.table('RAW_POS.Location_New')

In [128]:
sdf_loc_dr=sdf_loc.drop("PLACEKEY","ISO_COUNTRY_CODE")

In [129]:
sdf_loc_dr.show()

---------------------------------------------------------------------------------------------------------------------------------------------------------------
|"LOCATION_ID"  |"LOCATION"                                        |"CITY"  |"REGION"  |"COUNTRY"      |"LAT"               |"LONG"               |"ZIPCODE"  |
---------------------------------------------------------------------------------------------------------------------------------------------------------------
|1030           |University Of Colorado Museum Of Natural History  |Denver  |CO        |United States  |40.00768674326263   |-105.26970066129032  |80802      |
|1031           |Denver Technological Center                       |Denver  |CO        |United States  |39.62735682905758   |-104.91269066527825  |80237      |
|1032           |Heritage Club At Denver Tech Center               |Denver  |CO        |United States  |39.62526657316754   |-104.91238594440485  |80237      |
|1033           |Porter Wound Care Cente

In [130]:
semi_final_df=grouped_df.join(sdf_loc_dr, grouped_df["LOCATION_ID"] == sdf_loc_dr["LOCATION_ID"])

In [131]:
semi_final_df.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"DATE"      |"TRUCK_ID"  |"MONTH"  |"HOUR"  |"DOW"  |"DAY"  |"MENU_TYPE"  |"l_mqaw_LOCATION_ID"  |"PUBLIC_HOLIDAY"  |"SUM(ORDER_TOTAL)"  |"r_a923_LOCATION_ID"  |"LOCATION"                            |"CITY"     |"REGION"  |"COUNTRY"      |"LAT"               |"LONG"               |"ZIPCODE"  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|2019-01-01  |1           |1        |9       |2      |1      |Ice Cream    |2655                  |0            

In [132]:
wdf_re.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [133]:
semi_final_df=semi_final_df.with_column_renamed(col("ZIPCODE"), "POSTAL_CODE")

In [134]:
type(wdf_re["DATE"])

snowflake.snowpark.column.Column

In [135]:
final_df = semi_final_df.join(wdf_re, semi_final_df["DATE"] == wdf_re["DATE"])

In [136]:
weather=wdf_re.to_pandas()

In [137]:
#syntax=pd.io.sql.get_schema(weather, "weather_data")

In [138]:
final_df.show()

Failed to execute query [queryID: 01ac4dac-3200-bb82-0003-d87600059e1a]  SELECT  *  FROM ( SELECT  *  FROM (( SELECT "DATE" AS "l_de8w_DATE", "TRUCK_ID" AS "TRUCK_ID", "MONTH" AS "MONTH", "HOUR" AS "HOUR", "DOW" AS "DOW", "DAY" AS "DAY", "MENU_TYPE" AS "MENU_TYPE", "l_mqaw_LOCATION_ID" AS "l_mqaw_LOCATION_ID", "PUBLIC_HOLIDAY" AS "PUBLIC_HOLIDAY", "SUM(ORDER_TOTAL)" AS "SUM(ORDER_TOTAL)", "r_a923_LOCATION_ID" AS "r_a923_LOCATION_ID", "LOCATION" AS "LOCATION", "CITY" AS "CITY", "REGION" AS "REGION", "COUNTRY" AS "l_de8w_COUNTRY", "LAT" AS "LAT", "LONG" AS "LONG", "POSTAL_CODE" AS "l_de8w_POSTAL_CODE" FROM ( SELECT "DATE", "TRUCK_ID", "MONTH", "HOUR", "DOW", "DAY", "MENU_TYPE", "l_mqaw_LOCATION_ID", "PUBLIC_HOLIDAY", "SUM(ORDER_TOTAL)", "r_a923_LOCATION_ID", "LOCATION", "CITY", "REGION", "COUNTRY", "LAT", "LONG", "ZIPCODE" AS "POSTAL_CODE" FROM ( SELECT  *  FROM (( SELECT "DATE" AS "DATE", "TRUCK_ID" AS "TRUCK_ID", "MONTH" AS "MONTH", "HOUR" AS "HOUR", "DOW" AS "DOW", "DAY" AS "DAY", "ME

SnowparkSQLException: (1304): 002003 (02000): SQL compilation error:
Schema 'FROSTBYTE_TASTY_BYTES.STANDARD_TILE' does not exist or not authorized.