# Writing Parquet Files with PySpark
This notebook demonstrates how to create a DataFrame and write it as Parquet files using PySpark.

In [None]:
# Step 1: Start Spark session
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Write Parquet Example").getOrCreate()

## Create a Sample DataFrame
We create a simple DataFrame with sample data.

In [None]:
data = [
    (1, "Alice", 25),
    (2, "Bob", 30),
    (3, "Charlie", 28)
]
columns = ["id", "name", "age"]
df = spark.createDataFrame(data, schema=columns)
df.show()

## Write DataFrame to Parquet
This will write the DataFrame to disk in Parquet format.

In [None]:
df.write.parquet("/tmp/output/parquet_data", mode="overwrite")

## Optional: Partition and Compress Parquet Output

In [None]:
df.write \
  .partitionBy("age") \
  .option("compression", "snappy") \
  .parquet("/tmp/output/parquet_partitioned", mode="overwrite")

## Read Parquet Back into DataFrame

In [None]:
df_loaded = spark.read.parquet("/tmp/output/parquet_partitioned")
df_loaded.show()