Copyright (c) Microsoft Corporation. 
Licensed under the MIT license. 
In the Synapse-AI-Retail-Recommender, there are instructions for uploading the Product Detail JSON to the ADLS Gen2 Account that is attached to your Synapse Studio  
  
This script reads the Product Details JSON and creates a product table to be indexed and made queryable by the API Layer for adding in extra details about the products like images and cleaned up names. 

In [None]:
from pyspark.sql.functions import *

# name of the Data Lake Storage Account
DATA_LAKE_ACCOUNT_NAME = ""
# name of the filesystem that was chosen to be the primary filesystem
FILE_SYSTEM_NAME = ""

data_path = spark.read.load(f"abfss://{FILE_SYSTEM_NAME}@{DATA_LAKE_ACCOUNT_NAME}.dfs.core.windows.net/synapse/workspaces/product_detail.json", format='json', multiline=True)
df = data_path.select(explode(col("products")).alias("products")) \
                .select("products.*", lit(current_timestamp()).cast("timestamp").alias("updated_ts")) \
                .select("id", "productID", "productCategory", "brand", "name", "price", "imageURL", "description", "updated_ts")

try:
    spark.sql("CREATE DATABASE retailaidb")
except:
    print("Database already exists")


df.write.format("cosmos.oltp")\
    .option("spark.synapse.linkedService", "retail_ai_cosmos_synapse_link")\
    .option("spark.cosmos.container", "product_details")\
    .option("spark.cosmos.write.upsertEnabled", "true")\
    .mode('overwrite')\
    .save()