In [0]:
from pyspark.sql.functions import col, rand, monotonically_increasing_id, date_sub, current_date, lit

# Load the medical device retailers table from your catalog
retailers_df = spark.table("mma_fe_innovation.mma.medical_device_retailers")

# Load the medical devices table from your catalog
devices_df = spark.table("mma_fe_innovation.mma.usfda_medical_devices")

# Optional: Display a few rows to confirm they loaded correctly
print("Retailers table loaded:")
display(retailers_df.limit(5))

print("Devices table loaded:")
display(devices_df.limit(5))

In [0]:
# 1. Perform a cross join to create every possible combination of a retailer and a device.
# This forms the pool of all potential orders.
potential_orders_df = retailers_df.crossJoin(devices_df)

# 2. Add randomized order details to each potential order.
orders_df = potential_orders_df.withColumn(
    # Create a unique ID for each order
    "order_id", monotonically_increasing_id()
).withColumn(
    # Generate a random quantity for the order (e.g., between 1 and 25)
    "quantity", (rand() * 24 + 1).cast("int")
).withColumn(
    # Generate a random order date within the last two years
    "order_date", date_sub(current_date(), (rand() * 730).cast("int"))
)

# 3. Select and arrange the columns for the final table.
# IMPORTANT: Adjust these column names to match the actual columns in your source tables.
# For this example, we assume plausible names like 'k_number' for a device ID and 'company_name' for the retailer name.
final_orders_df = orders_df.select(
    col("order_id"),
    col("order_date"),
    col("FirmName").alias("retailer_name"), # Assuming 'company_name' is the retailer's name
    col("proprietary_name").alias("device_name"),        # Assuming 'k_number' is a unique device identifier
    col("quantity")
)

# Display the final generated orders table
print("Generated Orders Table:")
display(final_orders_df)

In [0]:
# Define the name for your new table
output_table_name = "mma_fe_innovation.mma.medical_orders_bronze"

# Save the DataFrame as a new table
final_orders_df.write.mode("overwrite").saveAsTable(output_table_name)

print(f"✅ Successfully created and saved the table '{output_table_name}'.")