In [2]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip
from pyspark.sql.functions import sum, count, to_date, col

# Spark session
builder = SparkSession.builder \
    .appName("GoldAggregation") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

# Silver'dan oku
df_silver = spark.read.format("delta").load("../delta/silver/online_retail_cleaned")

# GOLD #1: Günlük satış toplamı
daily_sales = df_silver \
    .withColumn("SaleDate", to_date("InvoiceDate")) \
    .groupBy("SaleDate") \
    .agg(sum("Quantity").alias("TotalQuantity"),
         sum(df_silver["Quantity"] * df_silver["UnitPrice"]).alias("TotalRevenue")) \
    .orderBy("SaleDate")

# GOLD #2: En çok satılan ürünler (top 10)
top_products = df_silver \
    .groupBy("Description") \
    .agg(sum("Quantity").alias("TotalSold")) \
    .orderBy(col("TotalSold").desc()) \
    .limit(10)

# GOLD #3: Ülke bazlı satış
country_sales = df_silver \
    .groupBy("Country") \
    .agg(sum(df_silver["Quantity"] * df_silver["UnitPrice"]).alias("CountryRevenue")) \
    .orderBy(col("CountryRevenue").desc())

# Kaydet
daily_sales.write.format("delta").mode("overwrite").save("../delta/gold/daily_sales")
top_products.write.format("delta").mode("overwrite").save("../delta/gold/top_products")
country_sales.write.format("delta").mode("overwrite").save("../delta/gold/country_sales")

daily_sales.show(5)
top_products.show(5)
country_sales.show(5)

print("✅ Gold aggregation işlemi tamamlandı.")

25/07/28 14:35:31 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


                                                                                

+----------+-------------+------------------+
|  SaleDate|TotalQuantity|      TotalRevenue|
+----------+-------------+------------------+
|2010-12-01|        24215| 46376.49000000003|
|2010-12-02|        31142| 47316.52999999987|
|2010-12-03|        11839|23921.710000000097|
|2010-12-05|        16449|31771.600000000166|
|2010-12-06|        16291| 31215.64000000016|
+----------+-------------+------------------+
only showing top 5 rows

+--------------------+---------+
|         Description|TotalSold|
+--------------------+---------+
|PAPER CRAFT , LIT...|    80995|
|MEDIUM CERAMIC TO...|    77916|
|WORLD WAR 2 GLIDE...|    54415|
|JUMBO BAG RED RET...|    46181|
|WHITE HANGING HEA...|    36725|
+--------------------+---------+
only showing top 5 rows

+--------------+------------------+
|       Country|    CountryRevenue|
+--------------+------------------+
|United Kingdom|  7308391.55399994|
|   Netherlands|285446.33999999997|
|          EIRE| 265545.8999999999|
|       Germany|228867.