In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window

spark = SparkSession.builder.appName("Window_Lab").getOrCreate()

data = [
    ("iPhone", "Mobile", 1000),
    ("Samsung", "Mobile", 800),
    ("Xiaomi", "Mobile", 800), # –¶–µ–Ω–∞ —Å–æ–≤–ø–∞–¥–∞–µ—Ç —Å Samsung!
    ("Nokia", "Mobile", 300),
    ("MacBook", "Laptop", 2000),
    ("Dell", "Laptop", 1500),
    ("HP", "Laptop", 1200)
]

columns = ["product", "category", "price"]

df = spark.createDataFrame(data, columns)

df.show()

+-------+--------+-----+
|product|category|price|
+-------+--------+-----+
| iPhone|  Mobile| 1000|
|Samsung|  Mobile|  800|
| Xiaomi|  Mobile|  800|
|  Nokia|  Mobile|  300|
|MacBook|  Laptop| 2000|
|   Dell|  Laptop| 1500|
|     HP|  Laptop| 1200|
+-------+--------+-----+



### –®–∞–≥ 2: –†–∞–Ω–∂–∏—Ä–æ–≤–∞–Ω–∏–µ (Ranking) üèÜ  
–ó–∞–¥–∞—á–∞: –ù–∞–π—Ç–∏ —Å–∞–º—ã–µ –¥–æ—Ä–æ–≥–∏–µ —Ç–æ–≤–∞—Ä—ã –≤ –∫–∞–∂–¥–æ–π –∫–∞—Ç–µ–≥–æ—Ä–∏–∏.

In [2]:
# 1. –°–æ–∑–¥–∞–µ–º —Å–ø–µ—Ü–∏—Ñ–∏–∫–∞—Ü–∏—é –æ–∫–Ω–∞
# –†–∞–∑–±–∏–≤–∞–µ–º –ø–æ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏, —Å–æ—Ä—Ç–∏—Ä—É–µ–º –ø–æ —Ü–µ–Ω–µ –£–ë–´–í–ê–ù–ò–Æ (desc)
window_spec_rank = Window.partitionBy("category").orderBy(F.col("price").desc())

# 2. –ü—Ä–∏–º–µ–Ω—è–µ–º —Ñ—É–Ω–∫—Ü–∏–∏
df_ranked = df \
        .withColumn("row_number", F.row_number().over(window_spec_rank)) \
        .withColumn("rank", F.rank().over(window_spec_rank)) \
        .withColumn("dense_rank", F.dense_rank().over(window_spec_rank))

df_ranked.show()



+-------+--------+-----+----------+----+----------+
|product|category|price|row_number|rank|dense_rank|
+-------+--------+-----+----------+----+----------+
|MacBook|  Laptop| 2000|         1|   1|         1|
|   Dell|  Laptop| 1500|         2|   2|         2|
|     HP|  Laptop| 1200|         3|   3|         3|
| iPhone|  Mobile| 1000|         1|   1|         1|
|Samsung|  Mobile|  800|         2|   2|         2|
| Xiaomi|  Mobile|  800|         3|   2|         2|
|  Nokia|  Mobile|  300|         4|   4|         3|
+-------+--------+-----+----------+----+----------+



### –®–∞–≥ 3: –°—Ä–∞–≤–Ω–µ–Ω–∏–µ —Å –ø—Ä–æ—à–ª—ã–º (Lag) üìâ
–ó–∞–¥–∞—á–∞: –ü–æ—Å—á–∏—Ç–∞—Ç—å —Ä–∞–∑–Ω–∏—Ü—É –≤ —Ü–µ–Ω–µ –º–µ–∂–¥—É —Ç–µ–∫—É—â–∏–º —Ç–æ–≤–∞—Ä–æ–º –∏ —Å–ª–µ–¥—É—é—â–∏–º –ø–æ –¥–µ—à–µ–≤–∏–∑–Ω–µ.

In [4]:
# –°–æ—Ä—Ç–∏—Ä—É–µ–º –ø–æ —Ü–µ–Ω–µ (–æ—Ç –¥–µ—à–µ–≤—ã—Ö –∫ –¥–æ—Ä–æ–≥–∏–º)
windows_spec_diff = Window.partitionBy(F.col("category")).orderBy(F.col("price"))

df_diff = df \
            .withColumn("prev_price", F.lag("price", 1).over(window_spec_rank)) \
            .withColumn("price_diff", F.col("price") - F.col("prev_price"))

df_diff.show()
# –ü–µ—Ä–≤–∞—è —Å—Ç—Ä–æ–∫–∞ –≤ –≥—Ä—É–ø–ø–µ –≤—Å–µ–≥–¥–∞ –±—É–¥–µ—Ç –∏–º–µ—Ç—å NULL –≤ prev_price –∏ diff.

+-------+--------+-----+----------+----------+
|product|category|price|prev_price|price_diff|
+-------+--------+-----+----------+----------+
|MacBook|  Laptop| 2000|      NULL|      NULL|
|   Dell|  Laptop| 1500|      2000|      -500|
|     HP|  Laptop| 1200|      1500|      -300|
| iPhone|  Mobile| 1000|      NULL|      NULL|
|Samsung|  Mobile|  800|      1000|      -200|
| Xiaomi|  Mobile|  800|       800|         0|
|  Nokia|  Mobile|  300|       800|      -500|
+-------+--------+-----+----------+----------+



### –®–∞–≥ 4: –ù–∞–∫–æ–ø–∏—Ç–µ–ª—å–Ω—ã–π –∏—Ç–æ–≥ (Running Total) üìà
–ó–∞–¥–∞—á–∞: –ü–æ—Å—á–∏—Ç–∞—Ç—å —Å—É–º–º—É —Ü–µ–Ω –Ω–∞–∫–æ–ø–∏—Ç–µ–ª—å–Ω—ã–º –∏—Ç–æ–≥–æ–º (–Ω–∞–ø—Ä–∏–º–µ—Ä, –¥–ª—è –∫–æ–Ω—Ç—Ä–æ–ª—è –±—é–¥–∂–µ—Ç–∞).

In [5]:
# rowsBetween —É–∫–∞–∑—ã–≤–∞–µ—Ç –≥—Ä–∞–Ω–∏—Ü—ã —Ä–∞–º–∫–∏. 
# Window.unboundedPreceding - –æ—Ç —Å–∞–º–æ–≥–æ –Ω–∞—á–∞–ª–∞ —Ä–∞–∑–¥–µ–ª–∞
# Window.currentRow - –¥–æ —Ç–µ–∫—É—â–µ–π —Å—Ç—Ä–æ–∫–∏

window_cum = Window.partitionBy(F.col("category")).orderBy(F.col("price")) \
                .rowsBetween(Window.unboundedPreceding, Window.currentRow)

df_cum = df.withColumn("running_total", F.sum(F.col("price")).over(window_cum))

df_cum.show()



+-------+--------+-----+-------------+
|product|category|price|running_total|
+-------+--------+-----+-------------+
|     HP|  Laptop| 1200|         1200|
|   Dell|  Laptop| 1500|         2700|
|MacBook|  Laptop| 2000|         4700|
|  Nokia|  Mobile|  300|          300|
|Samsung|  Mobile|  800|         1100|
| Xiaomi|  Mobile|  800|         1900|
| iPhone|  Mobile| 1000|         2900|
+-------+--------+-----+-------------+



In [6]:
spark.stop()