In [1]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip
from pyspark.sql.functions import col, to_timestamp, when, trim

# Spark session
builder = SparkSession.builder \
    .appName("SilverCleaning") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

# Bronze veriyi oku
df_bronze = spark.read.format("delta").load("../delta/bronze/online_retail")

# 📌 Veri temizleme adımları
df_silver = df_bronze \
    .dropna(subset=["InvoiceNo", "StockCode", "Description", "InvoiceDate", "CustomerID"]) \
    .withColumn("InvoiceDate", to_timestamp(col("InvoiceDate"))) \
    .withColumn("Quantity", col("Quantity").cast("int")) \
    .withColumn("UnitPrice", col("UnitPrice").cast("double")) \
    .withColumn("CustomerID", col("CustomerID").cast("string")) \
    .withColumn("Description", trim(col("Description"))) \
    .filter(col("Quantity") > 0) \
    .filter(col("UnitPrice") > 0)

# Silver katmanına yaz
df_silver.write.format("delta").mode("overwrite").save("../delta/silver/online_retail_cleaned")
df_silver.printSchema()
df_silver.show(5)
print("✅ Silver cleaning işlemi tamamlandı.")

:: loading settings :: url = jar:file:/opt/miniconda3/envs/spark-delta-env/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml
25/07/28 14:31:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Ivy Default Cache set to: /Users/alialtunoglu/.ivy2/cache
The jars for the packages stored in: /Users/alialtunoglu/.ivy2/jars
io.delta#delta-core_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-7ed0061e-b9ae-48ab-ac4e-45afc46a7a39;1.0
	confs: [default]
	found io.delta#delta-core_2.12;2.3.0 in central
	found io.delta#delta-storage;2.3.0 in central
	found org.antlr#antlr4-runtime;4.8 in central
:: resolution report :: resolve 75ms :: artifacts dl 5ms
	:: modules in use:
	io.delta#delta-core_2.12;2.3.0 from central in [default]
	io.delta#delta-storage;2.3.0 from central in [default]
	org.antlr#antlr4-runtime;4.8 from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |   3   

25/07/28 14:31:15 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
25/07/28 14:31:18 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


                                                                                

25/07/28 14:31:19 WARN MemoryManager: Total allocation exceeds 95,00% (1.020.054.720 bytes) of heap memory
Scaling row group sizes to 95,00% for 8 writers


                                                                                

root
 |-- InvoiceNo: string (nullable = true)
 |-- StockCode: string (nullable = true)
 |-- Description: string (nullable = true)
 |-- Quantity: integer (nullable = true)
 |-- InvoiceDate: timestamp (nullable = true)
 |-- UnitPrice: double (nullable = true)
 |-- CustomerID: string (nullable = true)
 |-- Country: string (nullable = true)

+---------+---------+--------------------+--------+-------------------+---------+----------+--------------+
|InvoiceNo|StockCode|         Description|Quantity|        InvoiceDate|UnitPrice|CustomerID|       Country|
+---------+---------+--------------------+--------+-------------------+---------+----------+--------------+
|   549169|    21034|REX CASH+CARRY JU...|       3|2011-04-06 17:25:00|     0.95|   17906.0|United Kingdom|
|   549169|    23009|I LOVE LONDON BAB...|       1|2011-04-06 17:25:00|    16.95|   17906.0|United Kingdom|
|   549169|   85027L|FRENCH CHATEAU LA...|       4|2011-04-06 17:25:00|     1.95|   17906.0|United Kingdom|
|   549169| 