In [None]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip
from pyspark.sql.functions import col, to_timestamp, when, trim

# Spark session
builder = SparkSession.builder \
    .appName("SilverCleaning") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

# Bronze veriyi oku
df_bronze = spark.read.format("delta").load("../delta/bronze/online_retail")

# 📌 Veri temizleme adımları
df_silver = df_bronze \
    .dropna(subset=["InvoiceNo", "StockCode", "Description", "InvoiceDate", "CustomerID"]) \
    .withColumn("InvoiceDate", to_timestamp(col("InvoiceDate"))) \
    .withColumn("Quantity", col("Quantity").cast("int")) \
    .withColumn("UnitPrice", col("UnitPrice").cast("double")) \
    .withColumn("CustomerID", col("CustomerID").cast("string")) \
    .withColumn("Description", trim(col("Description"))) \
    .filter(col("Quantity") > 0) \
    .filter(col("UnitPrice") > 0)

# Silver katmanına yaz
df_silver.write.format("delta").mode("overwrite").save("../delta/silver/online_retail_cleaned")
df_silver.printSchema()
df_silver.show(5)
print("✅ Silver cleaning işlemi tamamlandı.")