In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window

data = [
    (1, "Amit Sharma", "IT", 70000, "2025-01-10", "Pune"),
    (2, "Neha Singh", "HR", 45000, "2026-03-15", "Mumbai"),
    (3, "Priya Patel", "IT", 80000, "2023-01-10", "Pune"),
    (4, "Bhavesh Patel", "IT", 90000, "2024-01-10", "Mumbai"),
    (5, "Amit Patel", "IT",None, "2025-01-10", None),
    (6, "Bhushan Sonsale", "HR", 70000, "2023-01-10", "Bengaluru" )
]

cols = ["emp_id", "name", "dept", "salary", "join_date", "city"]

df = spark.createDataFrame(data, cols)
df.show()

In [0]:
df.select("emp_id", "name", "dept").show()

In [0]:
df.selectExpr("emp_id", "salary * 12 as annual_salary").show()

In [0]:
df = df.withColumn("salary_increase", col("salary") + 5000)
df.display()

In [0]:
df = df.withColumnRenamed("dept", "department")
df.display()

In [0]:
df =df.drop("city")

In [0]:
df.filter(col("salary") > 50000).show()

In [0]:
df = df.withColumn(
    "grade", 
        when(col("salary") >= 70000, "A")
        .when(col("salary") >= 50000, "B")
        .otherwise("C")

 )

In [0]:
df.filter(col("salary").isNull()).show()

In [0]:
df.filter(col("salary").between(40000, 80000)).show()

In [0]:
df.groupBy("department").count().show()

In [0]:
df.groupBy("department").agg(
    sum("salary").alias("total_salary"),
    avg("salary").alias("avg_salary"),
    max("salary").alias("max_salary")
).show()

In [0]:
df = df.dropDuplicates(["emp_id", "name"])

In [0]:
df.orderBy(col("salary").desc()).show()

In [0]:
dept_df = spark.createDataFrame([
 ("IT", "Tech"), 
 ("HR", "Human Resource"), ("Finance", "Accounts")],
 ["department", "dept_desc"]
)
dept_df.show()

In [0]:
df = df.join(dept_df, "department", "left")
df.show()

In [0]:
df.display()

In [0]:
window_spec = Window.partitionBy("department").orderBy(col("salary").desc())

df = df.withColumn("row_num", row_number().over(window_spec)) \
      .withColumn("rank", rank().over(window_spec)) \
      .withColumn("dense_rank", dense_rank().over(window_spec))
df.display()