In [0]:
from pyspark.sql import functions as F

bronze_df = spark.table("airbnb_bronze.listings_raw")

print("Before cleanup:")
display(bronze_df.groupBy("city").count().orderBy("city"))

bronze_cleaned = (
    bronze_df
    .withColumn(
        "city",
        F.when(F.lower("city") == "paris",  F.lit("Paris"))
         .when(F.lower("city") == "venice", F.lit("Venice"))
         .otherwise(F.col("city"))
         .cast("string")
    )
)


bronze_dedup = bronze_cleaned.dropDuplicates()

(
    bronze_dedup
    .write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .partitionBy("city")
    .saveAsTable("airbnb_bronze.listings_raw")
)

print("âœ… Bronze table cleaned and normalized.")
print("New partition breakdown:")
display(
    spark.table("airbnb_bronze.listings_raw")
         .groupBy("city")
         .count()
         .orderBy("city")
)
