# Imports

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

# Widgets

In [0]:
dbutils.widgets.removeAll()

In [0]:
dbutils.widgets.text("storageName", "adlsprojectsmartdata")
dbutils.widgets.text("container", "raw")
dbutils.widgets.text("catalog", "project_smartdata")
dbutils.widgets.text("schema", "bronze")


# Constants

In [0]:
storage_name = dbutils.widgets.get("storageName")
container = dbutils.widgets.get("container")
catalog =  dbutils.widgets.get("catalog")
schema =  dbutils.widgets.get("schema")

# Paths

In [0]:
path_base = f"abfss://{container}@{storage_name}.dfs.core.windows.net/"
path_category =  f"{path_base}category.csv"
path_products =  f"{path_base}products.csv"
path_stores   =  f"{path_base}stores.csv"


# Structures

In [0]:
category_schema = StructType(fields=[
    StructField("categoryId", StringType(), False),
    StructField("categoryName", StringType(), False)
])

In [0]:
products_schema = StructType(fields=[
    StructField("productId", StringType(), False),
    StructField("productName", StringType(), False),
    StructField("categoryId", StringType(), False),
    StructField("launchDate",DateType(), False),
    StructField("price", DecimalType(), False)
])

In [0]:
stores_schema =  StructType(fields=[
    StructField("storeId",StringType(), False),
    StructField("storeName", StringType(), False),
    StructField("city", StringType(), False),
    StructField("country", StringType(), False)
])

# Read source

In [0]:
df_products =  spark.read.option('header', True)\
                        .schema(products_schema)\
                        .csv(path_products)

In [0]:
df_category =  spark.read.option('header', True)\
                         .schema(category_schema)\
                         .csv(path_category)

In [0]:
df_stores = spark.read.option('header', True)\
                      .option('inferSchema', True)\
                      .csv(path_stores)

In [0]:
df_products_selected = df_products.select(col("productId"),
                                          col("productName"),
                                          col("categoryId"),
                                          col("price"))

# Save

In [0]:
df_category.write.mode("overwrite").saveAsTable(f"{catalog}.{schema}.category")

In [0]:
df_products.write.mode("overwrite").saveAsTable(f"{catalog}.{schema}.products")

In [0]:
df_stores.write.mode("overwrite").saveAsTable(f"{catalog}.{schema}.stores")