In [0]:
from pyspark.sql.functions import year, month, col
from pyspark.sql.types import StructType, StructField, IntegerType, FloatType, StringType, TimestampType, DateType

# --------------------------------------------
# 1. Bronze and Silver file paths
# --------------------------------------------

bronze_base_path = "abfss://kivabronze@kivastorageacc2.dfs.core.windows.net/"
silver_base_path = "abfss://kiva-silver@kivastorageacc2.dfs.core.windows.net/kiva-data-clean"



In [0]:
# Storage account name and key
storage_account_name = "kivastorageacc2"
storage_account_key = "Your_KEY_HERE"  # Azure Storage account key

spark.conf.set(
    f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net", 
    storage_account_key
)

In [0]:
# --------------------------------------------
# 2. Read Bronze Data
# --------------------------------------------

df_bronze = spark.read.option("recursiveFileLookup", "true").parquet(bronze_base_path)


In [0]:

# --------------------------------------------
# 3. Data preparetions
# --------------------------------------------
df_cleaned = (
    df_bronze
    .dropna(subset=["id", "loan_amount", "country", "posted_time"])  # critical nulls are deleted
    .fillna({"region": "Unknown", "partner_id": -1})                 # non-critical nulls are filled
    .withColumn("year", year("date"))
    .withColumn("month", month("date"))
)


In [0]:

# --------------------------------------------
# 4. Write to Silver Layer 
# --------------------------------------------

df_cleaned.write \
    .mode("overwrite") \
    .partitionBy("year", "month") \
    .parquet(silver_base_path)

print("Temizlenmiş veri başarıyla Silver katmanına yazıldı.")

Temizlenmiş veri başarıyla Silver katmanına yazıldı.
