In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, col
from pyspark.sql.window import Window
from pyspark.sql.functions import regexp_extract

def rollingAverage3h(file_path):
    df = spark.read.csv(file_path, header=True, inferSchema=True).select("MTU", "Wind Onshore  - Actual Aggregated [MW]")

    # just display the end time of the time frame
    df_with_time = df.withColumn("End_Time", regexp_extract(df["MTU"], r"(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2} \(CET/CEST\))", 1))

    # 3h sliding window average
    windowSpec = Window.orderBy("End_Time").rowsBetween(-2, 0)

    df_with_avg = df_with_time.withColumn("Rolling Average 3h in MWh", avg("Wind Onshore  - Actual Aggregated [MW]").over(windowSpec))

    df_clean = df_with_avg.select(
        col("End_Time"), 
        col("Rolling Average 3h in MWh")
    )
    
    return df_clean

spark = SparkSession.builder.appName("ComputeAverageForEveryRow").getOrCreate()

df_average2021 = rollingAverage3h("data/production/SE1Onshore2021UTC.csv")

df_average2021.show()

df_average2022 = rollingAverage3h("data/production/SE1Onshore2022UTC.csv")

df_average2022.show()



23/10/07 17:35:14 WARN Utils: Your hostname, LeonsLaptop resolves to a loopback address: 127.0.1.1; using 192.168.182.132 instead (on interface wlp3s0)
23/10/07 17:35:14 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/10/07 17:35:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/10/07 17:35:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/10/07 17:35:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/10/07 17:35:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degra

+--------+-------------------------+
|End_Time|Rolling Average 3h in MWh|
+--------+-------------------------+
|        |                    116.0|
|        |                    113.0|
|        |       112.66666666666667|
|        |       118.66666666666667|
|        |       136.66666666666666|
|        |       164.66666666666666|
|        |       173.33333333333334|
|        |       174.66666666666666|
|        |       167.33333333333334|
|        |       166.66666666666666|
|        |       167.33333333333334|
|        |       161.66666666666666|
|        |                    151.0|
|        |                    135.0|
|        |       122.66666666666667|
|        |       120.66666666666667|
|        |                    127.0|
|        |       133.33333333333334|
|        |                    132.0|
|        |       124.33333333333333|
+--------+-------------------------+
only showing top 20 rows



23/10/07 17:35:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/10/07 17:35:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/10/07 17:35:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.


+--------+-------------------------+
|End_Time|Rolling Average 3h in MWh|
+--------+-------------------------+
|        |                   1223.0|
|        |                   1234.0|
|        |       1243.6666666666667|
|        |       1259.3333333333333|
|        |       1272.3333333333333|
|        |                   1275.0|
|        |       1269.3333333333333|
|        |                   1271.0|
|        |                   1270.0|
|        |       1260.3333333333333|
|        |                   1206.0|
|        |       1126.3333333333333|
|        |                   1054.0|
|        |                    999.0|
|        |                    961.0|
|        |        899.6666666666666|
|        |        804.3333333333334|
|        |        682.6666666666666|
|        |                    544.0|
|        |        411.6666666666667|
+--------+-------------------------+
only showing top 20 rows



23/10/07 17:35:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/10/07 17:35:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
