In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import split, explode, window, current_timestamp

# Create SparkSession
spark = SparkSession.builder \
    .appName("SlidingWindowExample") \
    .getOrCreate()

# Read streaming data from a TCP socket
lines = spark.readStream \
    .format("socket") \
    .option("host", "localhost") \
    .option("port", 9999) \
    .load()

# Split the lines into words and add a timestamp
words = lines.select(explode(split(lines.value, " ")).alias("word"), current_timestamp().alias("timestamp"))

# Define a sliding window of 10 seconds with a slide duration of 5 seconds
windowedCounts = words \
    .groupBy(window(words.timestamp, "10 seconds", "5 seconds"), words.word) \
    .count()

# Output the word counts to the console
query = windowedCounts \
    .writeStream \
    .outputMode("complete") \
    .format("console") \
    .option("truncate", False) \
    .start()


# Wait for the streaming to finish
query.awaitTermination()

# Stop the SparkSession
spark.stop()


In [2]:
## Stock


In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import split, explode, window, current_timestamp, col
from pyspark.sql.types import IntegerType

# Create SparkSession
spark = SparkSession.builder \
    .appName("SlidingWindowExample") \
    .getOrCreate()

# Read streaming data from a TCP socket
lines = spark.readStream \
    .format("socket") \
    .option("host", "localhost") \
    .option("port", 9999) \
    .load()

# Split the lines into words and add a timestamp
words = lines.select(explode(split(lines.value, " ")).alias("word"), current_timestamp().alias("timestamp"))
words = words.withColumn("word", col("word").cast(IntegerType()))

# Define the initial sliding window of 10 seconds with a slide duration of 1 second
windowedCounts = words \
    .groupBy(window(words.timestamp, "10 seconds", "5 seconds")) \
    .agg({"word": "avg"})

# Output the initial windowed averages to the console
query1 = windowedCounts \
    .writeStream \
    .outputMode("complete") \
    .format("console") \
    .option("truncate", False) \
    .start()

# Define a new sliding window with a smaller slide duration (e.g., 5 seconds)
# windowedCounts2 = words \
#     .groupBy(window(words.timestamp, "10 seconds", "5 seconds")) \
#     .agg({"word": "avg"})

# # Output the new windowed averages to the console
# query2 = windowedCounts2 \
#     .writeStream \
#     .outputMode("complete") \
#     .format("console") \
#     .option("truncate", False) \
#     .start()

# Wait for the streaming to finish
query1.awaitTermination()
# query2.awaitTermination()

# Stop the SparkSession
spark.stop()


24/05/13 00:55:50 WARN TextSocketSourceProvider: The socket source should not be used for production applications! It does not support recovery.
24/05/13 00:55:50 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-72c9003b-46db-4e6b-9126-0c72186f975e. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
24/05/13 00:55:50 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
                                                                                

-------------------------------------------
Batch: 0
-------------------------------------------
+------+---------+
|window|avg(word)|
+------+---------+
+------+---------+



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|1.0      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|1.0      |
+------------------------------------------+---------+



                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|2.5      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|2.5      |
+------------------------------------------+---------+



                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:56:10, 2024-05-13 00:56:20}|3.0      |
|{2024-05-13 00:56:05, 2024-05-13 00:56:15}|3.0      |
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|2.5      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|2.5      |
+------------------------------------------+---------+



                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:56:10, 2024-05-13 00:56:20}|3.5      |
|{2024-05-13 00:56:05, 2024-05-13 00:56:15}|3.5      |
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|2.5      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|2.5      |
+------------------------------------------+---------+



                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:56:10, 2024-05-13 00:56:20}|3.5      |
|{2024-05-13 00:56:20, 2024-05-13 00:56:30}|100.0    |
|{2024-05-13 00:56:05, 2024-05-13 00:56:15}|3.5      |
|{2024-05-13 00:56:15, 2024-05-13 00:56:25}|100.0    |
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|2.5      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|2.5      |
+------------------------------------------+---------+



                                                                                

-------------------------------------------
Batch: 6
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:56:10, 2024-05-13 00:56:20}|3.5      |
|{2024-05-13 00:56:35, 2024-05-13 00:56:45}|200.0    |
|{2024-05-13 00:56:20, 2024-05-13 00:56:30}|100.0    |
|{2024-05-13 00:56:05, 2024-05-13 00:56:15}|3.5      |
|{2024-05-13 00:56:40, 2024-05-13 00:56:50}|200.0    |
|{2024-05-13 00:56:15, 2024-05-13 00:56:25}|100.0    |
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|2.5      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|2.5      |
+------------------------------------------+---------+



                                                                                

-------------------------------------------
Batch: 7
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:56:10, 2024-05-13 00:56:20}|3.5      |
|{2024-05-13 00:56:35, 2024-05-13 00:56:45}|200.0    |
|{2024-05-13 00:56:50, 2024-05-13 00:57:00}|300.0    |
|{2024-05-13 00:56:45, 2024-05-13 00:56:55}|300.0    |
|{2024-05-13 00:56:20, 2024-05-13 00:56:30}|100.0    |
|{2024-05-13 00:56:05, 2024-05-13 00:56:15}|3.5      |
|{2024-05-13 00:56:40, 2024-05-13 00:56:50}|200.0    |
|{2024-05-13 00:56:15, 2024-05-13 00:56:25}|100.0    |
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|2.5      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|2.5      |
+------------------------------------------+---------+



                                                                                

-------------------------------------------
Batch: 8
-------------------------------------------
+------------------------------------------+---------+
|window                                    |avg(word)|
+------------------------------------------+---------+
|{2024-05-13 00:56:10, 2024-05-13 00:56:20}|3.5      |
|{2024-05-13 00:56:35, 2024-05-13 00:56:45}|200.0    |
|{2024-05-13 00:56:50, 2024-05-13 00:57:00}|350.0    |
|{2024-05-13 00:56:45, 2024-05-13 00:56:55}|350.0    |
|{2024-05-13 00:56:20, 2024-05-13 00:56:30}|100.0    |
|{2024-05-13 00:56:05, 2024-05-13 00:56:15}|3.5      |
|{2024-05-13 00:56:40, 2024-05-13 00:56:50}|200.0    |
|{2024-05-13 00:56:15, 2024-05-13 00:56:25}|100.0    |
|{2024-05-13 00:55:50, 2024-05-13 00:56:00}|2.5      |
|{2024-05-13 00:55:55, 2024-05-13 00:56:05}|2.5      |
+------------------------------------------+---------+

