###Borrar y crear widgets

In [0]:
dbutils.widgets.removeAll()
dbutils.widgets.text("catalog", "bakehouse_dev")

catalogName = dbutils.widgets.get("catalog")

# Bandera para mostrar prints en la fase de desarrollo
showPrint = False

### Insertar Gold - Customers

In [0]:
from pyspark.sql import functions as F

# Consultar tabla
df_silver = spark.table(f"{catalogName}.silver.customers_silver")

if showPrint:
  row_count, column_count = df_silver.count(), len(df_silver.columns)
  print(f"Row count: {row_count}")
  print(f"Column count: {column_count}")
  display(df_silver.limit(10))

In [0]:
# en el dataframe df_silver encontrar el numero de clientes por country y el numero de clientes por country, state. El resultado debe quedar en un solo dataframe

df_country = df_silver.groupBy("country").agg(F.count("customerID").alias("customer_count_country"))
df_country_state = df_silver.groupBy("country", "state").agg(F.count("customerID").alias("customer_count_country_state"))

df_result = df_country.join(df_country_state, on="country", how="outer")

# add metadata columns
df_result = df_result.withColumn("updatedate", F.current_timestamp())

if showPrint:
    display(df_result.orderBy("country", "state").limit(10))

In [0]:
# Seleccionar y organizar columnas
df_gold = df_result.select("country", "customer_count_country", "state", "customer_count_country_state", "updatedate")

if showPrint:
    df_gold.printSchema()

In [0]:
# guardar los datos del dataframe en la tabla
df_gold.write.format("delta") \
    .mode("overwrite") \
    .options(mergeSchema="true") \
    .saveAsTable(f"{catalogName}.gold.customers_gold")