In [0]:
from pyspark.sql.types import StructType, StringType, IntegerType

schema = StructType() \
    .add("order_id", StringType()) \
    .add("customer_id", StringType()) \
    .add("product", StringType()) \
    .add("quantity", IntegerType()) \
    .add("region", StringType())

initial_data = [
    ("1", "C101", "Laptop", 2, "South"),
    ("2", "C102", "Chair", 6, "North"),
    ("3", "C103", "Mobile", 1, "East")
]

df = spark.createDataFrame(initial_data, schema)

df.write \
  .mode("overwrite") \
  .option("header", True) \
  .csv("dbfs:/tmp/stream/orders")

In [0]:
orders_stream = (
    spark.readStream
    .schema(schema)
    .option("header",True)
    .csv("dbfs:/tmp/stream/orders")
)

In [0]:
from pyspark.sql.functions import when

transformed_orders = orders_stream.withColumn("bulk_order", when(orders_stream.quantity > 5, True).otherwise(False))

In [0]:
from pyspark.sql.functions import col
rate_df = (
    spark.readStream
    .format("rate")
    .option("rowsPerSecond", 1)
    .load()
)
transformed_df = rate_df.withColumn("is_even", (col("value") % 2 == 0))
# Write to memory (temp table)
query = (
    transformed_df.writeStream
    .format("memory")  # 👈 this is key
    .queryName("rate_table")  # 👈 table name to query later
    .outputMode("append")
    .start())


In [0]:
spark.sql("SELECT * FROM rate_table").show()

+--------------------+-----+-------+
|           timestamp|value|is_even|
+--------------------+-----+-------+
|2025-08-08 11:10:...|    0|   true|
|2025-08-08 11:10:...|    1|  false|
|2025-08-08 11:10:...|    2|   true|
|2025-08-08 11:10:...|    3|  false|
|2025-08-08 11:10:...|    4|   true|
|2025-08-08 11:10:...|    5|  false|
|2025-08-08 11:10:...|    6|   true|
|2025-08-08 11:10:...|    7|  false|
|2025-08-08 11:10:...|    8|   true|
|2025-08-08 11:10:...|    9|  false|
|2025-08-08 11:10:...|   10|   true|
|2025-08-08 11:10:...|   11|  false|
|2025-08-08 11:10:...|   12|   true|
|2025-08-08 11:10:...|   13|  false|
|2025-08-08 11:10:...|   14|   true|
|2025-08-08 11:10:...|   15|  false|
|2025-08-08 11:10:...|   16|   true|
|2025-08-08 11:10:...|   17|  false|
|2025-08-08 11:10:...|   18|   true|
|2025-08-08 11:10:...|   19|  false|
+--------------------+-----+-------+
only showing top 20 rows
