- **Schema Enforcement** - When writing the data into table/file, ensure that data is in a existing file/data format

In [0]:
from pyspark.sql import Row

# 1. Define the Delta path
delta_path = "/Volumes/inceptez_catalog/inputdb/moviesdata/movies_delta"

# 2. Create initial DataFrame
base_data = [
    Row(Title="Inception", Release_Year=2010, Rating=8.8),
    Row(Title="Interstellar", Release_Year=2014, Rating=8.6)
]
df_base = spark.createDataFrame(base_data)

df_base.write.mode("overwrite").save(delta_path)
print("Created initial Delta table")

In [0]:
print("Initial Data:")
spark.read.format("delta").load(delta_path).display()

In [0]:
spark.sql(f"select * from delta.`{delta_path}`").display()


In [0]:
spark.sql(f"select * from delta.`{delta_path}`").printSchema()

In [0]:
bad_data = [
    Row(Movie_Title="Tenet", Release_Year="2020", Rating="7.5")  # wrong column + wrong type
]
df_bad = spark.createDataFrame(bad_data)
try:
    df_bad.write.format("delta").mode("append").save(delta_path)
except Exception as e:
    print("Schema enforcement triggered:\n")


In [0]:
#writing as csv format
df_base.write.format("csv").mode("overwrite").save("/Volumes/inceptez_catalog/inputdb/moviesdata/movies_csv")
print("Created csv file")

In [0]:
bad_data = [
    Row(Movie_Title="Tenet", Release_Year="Not Rated", Rating="NA")  # wrong column + wrong type
]
df_bad = spark.createDataFrame(bad_data)
df_bad.write.format("csv").mode("append").save("/Volumes/inceptez_catalog/inputdb/moviesdata/movies_csv")
print("Data appended")


In [0]:
%sql
select * from csv.`/Volumes/inceptez_catalog/inputdb/moviesdata/movies_csv`;