# PySpark DataFrame API Commands for Interviews

This notebook demonstrates basic and advanced examples of the PySpark DataFrame API.

## 1. Initialize Spark

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('df-api-demo').getOrCreate()

## 2. Create a DataFrame

In [None]:
data = [(1, 'Alice', 5), (2, 'Bob', 3), (3, 'Carol', 4)]
schema = ['id', 'name', 'score']
df = spark.createDataFrame(data, schema)

## 3. Select and Filter

In [None]:
df_filtered = df.select('id', 'name').filter(df.score > 3)

## 4. Aggregation

In [None]:
agg_df = df.groupBy('name').agg({'score': 'avg'})

## 5. Join DataFrames

In [None]:
df2 = spark.createDataFrame([(1, 'NY'), (2, 'CA'), (3, 'TX')], ['id', 'state'])
joined_df = df.join(df2, 'id', 'inner')

## 6. Window Functions

In [None]:
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number, desc

window_spec = Window.orderBy(desc('score'))
ranked_df = df.withColumn('rank', row_number().over(window_spec))

## 7. Advanced Transformations

In [None]:
from pyspark.sql.functions import when

df_adv = df.withColumn('grade', when(df.score > 4, 'high').otherwise('low'))

## 8. Write Data as Delta

In [None]:
df.write.format('delta').mode('overwrite').save('/path/to/delta-table')