In [0]:
import requests
import json
from pyspark.sql import SparkSession
from datetime import datetime
from time import sleep

In [0]:
# Base API endpoint
BASE_URL = "https://azure-api.companyname.io/shopify/products"

# Output base path in DBFS
BASE_PATH = "dbfs:/mnt/shopify/"

# Get today's date to use in file name
updated_date = datetime.utcnow().strftime("%Y-%m-%d")


# Function to save JSON to DBFS
def save_to_dbfs(data, page_num):
    path = f"{BASE_PATH}shopify__{updated_date}__{page_num}.json"
    file = json.dumps(data)
    dbutils.fs.put(path, file, overwrite=True)
    print(f"Saved page {page_num} to {path}")


# Load payloads to blob storage with page iteration
def fetch_and_store_all_pages():
    has_more = True
    page = 1
    since_id = None

    while has_more:
        if since_id:
            url = f"{BASE_URL}?page={page}&since_id={since_id}"
        else:
            url = BASE_URL

        response = requests.get(url)

        if response.status_code != 200:
            print(f"Failed to fetch page {page}. Status: {response.status_code}")
            break

        data = response.json()
        save_to_dbfs(data, page)

        # chech and iterate next page
        has_more = data.get("has_more", False)
        since_id = data.get("since_id")
        page += 1
        sleep(5)

    print("All pages fetched and saved.")

In [0]:
containerName = "shopify"
storageAccountName = "blobexternals"
sas = "?sv=2023-01-03&st=2025-06-09T00%3A45............Ea%2FU1s%3D"
config = "fs.azure.sas." + containerName+ "." + storageAccountName + ".blob.core.windows.net"

try:
    dbutils.fs.unmount("/mnt/shopify")
except:
    pass
    
try:
    dbutils.fs.mount(
      source = "wasbs://{}@{}.blob.core.windows.net".format(containerName,storageAccountName),
      mount_point = "/mnt/shopify",
      extra_configs = {config : sas})
except:
    pass

/mnt/shopify has been unmounted.


In [0]:
# Fetch all products
fetch_and_store_all_pages()

Wrote 393042 bytes.
Saved page 1 to dbfs:/mnt/shopify/shopify__2025-06-27__1.json
Wrote 397452 bytes.
Saved page 2 to dbfs:/mnt/shopify/shopify__2025-06-27__2.json
Wrote 385427 bytes.
Saved page 3 to dbfs:/mnt/shopify/shopify__2025-06-27__3.json
Wrote 397800 bytes.
Saved page 4 to dbfs:/mnt/shopify/shopify__2025-06-27__4.json
