## 1. Pivot

In [0]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("PivotExample").getOrCreate()

data = [
    ("Product A", "Jan", 100),
    ("Product A", "Feb", 150),
    ("Product B", "Jan", 200),
    ("Product B", "Feb", 180),
    ("Product C", "Mar", 220),
    ("Product A", "Mar", 120),
]

columns = ["product", "month", "sales"]
df = spark.createDataFrame(data, columns)
display(df)

In [0]:
from pyspark.sql.functions import first
pivot_df = df.groupBy("product").pivot("month").agg(first("sales"))
display(pivot_df)

In [0]:
pivot_df_limited = df.groupBy("product").pivot("month", ["Jan", "Feb"]).agg(first("sales"))
display(pivot_df_limited)

## 2. UnPivot

In [0]:
unpivot_df = pivot_df.selectExpr(
    "product",
    "stack(3, 'Jan', Jan, 'Feb', Feb, 'Mar', Mar) as (month, sales)"
)
display(unpivot_df)

In [0]:
from pyspark.sql.functions import array, struct, col, explode, lit

unpivot_df_alt = pivot_df.select(
    col("product"),
    explode(array(
        struct(lit("Jan").alias("month"), col("Jan").alias("sales")),
        struct(lit("Feb").alias("month"), col("Feb").alias("sales")),
        struct(lit("Mar").alias("month"), col("Mar").alias("sales"))
    )).alias("month_sales")
).select("product", "month_sales.month", "month_sales.sales")

display(unpivot_df_alt)

In [0]:
unpivot_df_alt.union(unpivot_df).show()