# Explanation of this notebook

This notebook is designed to guide the user through a specific process, providing examples, explanations and results. Throughout the notebook, code and text cells are presented that help to understand the concepts and execute the necessary tasks step by step. The objective is to facilitate the learning and practical application of the topics discussed.

In [0]:
%run ../Transversal/config

In [0]:
from datetime import datetime
import pytz

horario_colombia = pytz.timezone("America/Bogota")
current_date = datetime.now(horario_colombia).strftime("%d/%m/%Y %H:%M:%S")

customers = spark.table(Bronze_Customers)
orders = spark.table(Silver_Orders)

StatementMeta(, cbc8c9cb-5296-4c52-9648-b1ba4a95e1b8, 13, Finished, Available, Finished)

In [0]:
from pyspark.sql.functions import to_timestamp, col, min, max, count, sum as _sum, lit, datediff


def generate_silver_customers(orders_df, employees_df, table_silver, current_date):

    clean_orders_df = orders_df.dropDuplicates(["order_id"])
    clean_orders_df = clean_orders_df.withColumn("approved_date", to_timestamp(col("event_date"), "dd/MM/yyyy HH:mm:ss"))

    orders_agg = clean_orders_df.groupBy("customer_id").agg(
    min("approved_date").alias("first_purchase"),
    max("approved_date").alias("last_purchase"),
    count("order_id").alias("total_orders"),
    _sum("quantity_products").alias("total_products"),
    min("quantity_products").alias("min_quantity_sold"),
    max("quantity_products").alias("max_quantity_sold"))

    orders_agg = orders_agg.withColumn("load_date", to_timestamp(lit(current_date), "dd/MM/yyyy HH:mm:ss"))
    orders_agg = orders_agg.withColumn("days_as_customer", datediff(col("load_date"), col("first_purchase")))

    df_merged = employees_df.join(
        orders_agg,
        on="customer_id",
        how="inner")

    df_merged.write\
    .format("delta")\
    .mode("overwrite")\
    .option("overwriteSchema", "true")\
    .saveAsTable(table_silver)

    return df_merged

StatementMeta(, cbc8c9cb-5296-4c52-9648-b1ba4a95e1b8, 14, Finished, Available, Finished)

In [0]:
df = generate_silver_customers(
    orders_df=orders,
    employees_df=customers,
    table_silver=Silver_Customers,
    current_date=current_date
)

StatementMeta(, cbc8c9cb-5296-4c52-9648-b1ba4a95e1b8, 15, Finished, Available, Finished)