# Define Connections and Variables

In [None]:
import os
import requests
from requests.auth import HTTPBasicAuth

# Set your store URL and API key
shop = os.getenv("SHOPIFY_B2B")
shop_url = f'https://{shop}.myshopify.com'
username = os.getenv("SHOPIFY_B2B_USER")
private_app_password = dbutils.secrets.get(scope="azure_key_vault", key="SHOPIFYB2B-PW") # use Azure Key Vault to save this password. 
basic = HTTPBasicAuth(username, private_app_password)

In [None]:
# Define Azure SQL Database connection
jdbcHostname = os.getenv("SQLDB_HOST")
user = os.getenv("SQLDB_USER")
password = dbutils.secrets.get(scope="azure_key_vault", key="SQLDB-PW") # use Azure Key Vault to save this password. 
jdbcDatabase = os.getenv("SQLDB_BB")
jdbcPort = 1433
jdbcUrl = "jdbc:sqlserver://{0}:{1};database={2}".format(jdbcHostname, jdbcPort, jdbcDatabase)
connectionProperties = {
"user" : user,
"password" : password,
"driver" : "com.microsoft.sqlserver.jdbc.SQLServerDriver"
}

In [None]:
from datetime import date, datetime, timedelta, timezone
import pytz

def is_daylight_saving(date, tz):
    # Checks if a given date is in daylight saving time.
    # Returns True if the date is in daylight saving time, False otherwise.
    timezone = pytz.timezone(tz)
    date = timezone.localize(datetime.strptime(date, '%Y-%m-%d'))
    return date.dst() != timedelta(0)

# time zone
tz = 'US/Pacific'
# difference between current and previous date
delta = timedelta(days=1)

# start date and end date of a year yesterday
yesterday = datetime.now(pytz.utc).astimezone(pytz.timezone(tz)) - delta
start_dt = date(yesterday.year, 1, 1)
end_dt = date(yesterday.year, 12, 31)

# store the dates between two dates in a list
dates = []

while start_dt <= end_dt:
    # add current date to list by converting  it to iso format
    if is_daylight_saving(start_dt.isoformat(), tz):
        dates.append(start_dt)
    # increment start date by timedelta
    start_dt += delta

In [None]:
from datetime import date, timedelta

# Variables
output_table_name = "shopify_response"
sales_channel_id = 5
api_version = "2023-10"
entities = ["orders", "locations", "products", "customers"]
date_types = ["created_at", "updated_at"]
start_date = yesterday.strftime("%Y-%m-%d") # '2023-11-08' # yesterday in Production
end_date = start_date # start_date in Production
limit = 250  # 250 in Production

# define us_min_tz and us_max_tz
if start_date < dates[0].isoformat() or start_date > dates[-1].isoformat():
    us_min_tz = '-08'
elif dates[0].isoformat() <= start_date <= dates[-1].isoformat():
    us_min_tz = '-07'

if end_date < (dates[0] - delta).isoformat() or end_date >= dates[-1].isoformat():
    us_max_tz = '-08'
elif (dates[0] - delta).isoformat() <= end_date < dates[-1].isoformat():
    us_max_tz = '-07'

# Get Response from Shopify REST API

In [None]:
def get_requests(shop_url: str, api_version: str, entity: str, isCount: bool, date_type: str, start_date: str, end_date: str, us_min_tz: str, us_max_tz: str, limit: int=250, authentication=basic) -> list:
    # Create an empty list to hold the result temporarily
    data = []
    
    if not isCount:
        # Get requests from REST API and iterate to the end page
        result = requests.get(f'{shop_url}/admin/api/{api_version}/{entity}.json?status=any&{date_type}_min={start_date}T00%3A00%3A00{us_min_tz}%3A00&{date_type}_max={end_date}T23%3A59%3A59{us_max_tz}%3A00&limit={limit}', auth=authentication)

        while result.json().get(entity):
            data.append((entity, sales_channel_id, result.text, result.url, str(result.links.get('next').get('url') if result.links.get('next') else ''), result.status_code))

            if not result.links.get('next'):
                break
            
            result = requests.get(result.links.get('next').get('url'), auth=authentication)
    else:
        # Get count reqests from REST API and iterate to the end page
        result = requests.get(f'{shop_url}/admin/api/{api_version}/{entity}/count.json?status=any&{date_type}_min={start_date}T00%3A00%3A00{us_min_tz}%3A00&{date_type}_max={end_date}T23%3A59%3A59{us_max_tz}%3A00&limit={limit}', auth=authentication)

        while True:
            data.append((entity + '_count', sales_channel_id, result.text, result.url, str(result.links.get('next').get('url') if result.links.get('next') else ''), result.status_code))

            if not result.links.get('next'):
                break
            
            result = requests.get(result.links.get('next').get('url'), auth=authentication)
    return data

In [None]:
def get_requests_all(shop_url: str, api_version: str, entity: str, isCount: bool, limit: int=250, authentication=basic) -> list:
    # Create an empty list to hold the result temporarily
    data = []
    
    if not isCount:
        # Get requests from REST API and iterate to the end page
        result = requests.get(f'{shop_url}/admin/api/{api_version}/{entity}.json?limit={limit}', auth=authentication)

        while result.json().get(entity):
            data.append((entity, sales_channel_id, result.text, result.url, str(result.links.get('next').get('url') if result.links.get('next') else ''), result.status_code))

            if not result.links.get('next'):
                break
            
            result = requests.get(result.links.get('next').get('url'), auth=authentication)
    else:
        # Get count reqests from REST API and iterate to the end page
        result = requests.get(f'{shop_url}/admin/api/{api_version}/{entity}/count.json?limit={limit}', auth=authentication)

        while True:
            data.append((entity + '_count', sales_channel_id, result.text, result.url, str(result.links.get('next').get('url') if result.links.get('next') else ''), result.status_code))

            if not result.links.get('next'):
                break
            
            result = requests.get(result.links.get('next').get('url'), auth=authentication)
    return data

In [None]:
# Create an empty list to hold the result temporarily
data = []

# Cross over the entities and data_types
for entity in entities:
    if entity in ["locations", "products"]:
        # Get count reqests from REST API and iterate to the end page
        data += get_requests_all(shop_url, api_version, entity, 1, limit)
        # Get requests from REST API and iterate to the end page
        data += get_requests_all(shop_url, api_version, entity, 0, limit)
    else:
        for date_type in date_types:
            # Get count reqests from REST API and iterate to the end page
            data += get_requests(shop_url, api_version, entity, 1, date_type, start_date, end_date, us_min_tz, us_max_tz, limit)
            # Get requests from REST API and iterate to the end page
            data += get_requests(shop_url, api_version, entity, 0, date_type, start_date, end_date, us_min_tz, us_max_tz, limit)

# Create a DataFrame

In [None]:
import pyspark.sql.functions as F
import pyspark.sql.types as T

# Define the schema for the DataFrame
schema = T.StructType(
    [
        T.StructField("shopify_entity", T.StringType(), False),
        T.StructField("sales_channel_id", T.StringType(), False),
        T.StructField("json_response", T.StringType(), False),
        T.StructField("url", T.StringType(), False),
        T.StructField("next_url", T.StringType(), False),
        T.StructField("http_status", T.StringType(), False),
    ]
)

# Create a DataFrame
df = spark.createDataFrame(data, schema)

# Save the response to Azure SQL DB

In [None]:
# Define the output columns
output_cols = [
    "shopify_entity",
    "sales_channel_id",
    "json_response",
    "url",
    "next_url",
    "http_status",
]

In [None]:
# Get the current Pacific Time
current_timestamp_pt = F.from_utc_timestamp(
    F.current_timestamp(), "America/Los_Angeles"
)

# Output with current timestamp
df = df.withColumn("RecordCreatedDate", current_timestamp_pt)

# Save the output into Azure SQL Database
df.select(output_cols).write.jdbc(
    url=jdbcUrl,
    table=output_table_name,
    mode="append",
    properties=connectionProperties,
)

In [None]:
print(start_date, end_date)
df.where(df.shopify_entity == "orders_count").show(truncate=False, vertical=True)

In [None]:
df.where(df.shopify_entity == "customers_count").show(truncate=False, vertical=True)

# Execute stored procedure in DB to load JSON

In [None]:
driver_manager = spark._sc._gateway.jvm.java.sql.DriverManager
connection = driver_manager.getConnection(jdbcUrl, user, password)

# Load shopify_response data from Azure SQL Database into a dataframe
df_json = spark.read.jdbc(
    url=jdbcUrl,
    table=output_table_name,
    properties=connectionProperties,
)

# checking whether shopify_response is clear. otherwise, keep executing shopify_load_json.
while df_json.count() > 0:    
    connection.prepareCall("EXEC shopify_load_json;").execute()
    df_json = spark.read.jdbc(
        url=jdbcUrl,
        table=output_table_name,
        properties=connectionProperties,
    )

# close the connection
connection.close()