In [4]:
import os
import sys
from pyspark.sql import SparkSession, Window
from pyspark.sql import functions as f
from pyspark.sql.types import *

os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

spark = SparkSession.builder \
                    .appName("Window function Demo") \
                    .master("local[3]") \
                    .enableHiveSupport() \
                    .getOrCreate()

In [5]:
summary_df = spark.read \
        .format("parquet") \
        .load("summary.parquet")
summary_df.show(5)

+---------+----------+-----------+-------------+------------+
|  Country|WeekNumber|NumInvoices|TotalQuantity|InvoiceValue|
+---------+----------+-----------+-------------+------------+
|    Spain|        49|          1|           67|      174.72|
|  Germany|        48|         11|         1795|     3309.75|
|Lithuania|        48|          3|          622|     1598.06|
|  Germany|        49|         12|         1852|     4521.39|
|  Bahrain|        51|          1|           54|      205.74|
+---------+----------+-----------+-------------+------------+
only showing top 5 rows



In [6]:
running_total_window = Window.partitionBy("Country") \
        .orderBy("WeekNumber") \
        .rowsBetween(Window.unboundedPreceding, Window.currentRow)

In [7]:
summary_df.withColumn("RunningTotal",
                          f.sum("InvoiceValue").over(running_total_window)) \
        .show()

+---------------+----------+-----------+-------------+------------+------------------+
|        Country|WeekNumber|NumInvoices|TotalQuantity|InvoiceValue|      RunningTotal|
+---------------+----------+-----------+-------------+------------+------------------+
|      Australia|        48|          1|          107|      358.25|            358.25|
|      Australia|        49|          1|          214|       258.9|            617.15|
|      Australia|        50|          2|          133|      387.95|1005.0999999999999|
|        Austria|        50|          2|            3|      257.04|            257.04|
|        Bahrain|        51|          1|           54|      205.74|            205.74|
|        Belgium|        48|          1|          528|       346.1|             346.1|
|        Belgium|        50|          2|          285|      625.16|            971.26|
|        Belgium|        51|          2|          942|      838.65|1809.9099999999999|
|Channel Islands|        49|          1|   