In [0]:
# Import necessary libraries
import pandas as pd

# Set up catalog, schema, and table names
catalog_name = "retail_cpg_catalog"
schema_name = "retail_cpg_schema"
table_name = "online_retail"

# Create catalog and schema if they don’t exist
spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog_name}")
spark.sql(f"USE CATALOG {catalog_name}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
spark.sql(f"USE {schema_name}")

# Load public retail dataset (Online Retail from UCI)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx"
df_pandas = pd.read_excel(url)

# Convert Pandas DataFrame to Spark DataFrame
df_spark = spark.createDataFrame(df_pandas)

# Save as Delta table
df_spark.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable(f"{catalog_name}.{schema_name}.{table_name}")

# Quick check on the new table
spark.sql(f"SELECT * FROM {catalog_name}.{schema_name}.{table_name} LIMIT 10").show()
