In [0]:
from pyspark.sql.functions import col, expr
from datetime import datetime, timedelta
import random

# Generate time series records
now = datetime.now()
data = [
    (
        i,
        (now - timedelta(minutes=random.randint(0, 60))).isoformat(),
        random.randint(1, 100)
    )
    for i in range(100)
]

# Create DataFrame
df = spark.createDataFrame(data, ["id", "event_time", "value"])
df = df.withColumn("event_time", col("event_time").cast("timestamp"))

# Write to a Delta location for streaming simulation
df.write.mode("overwrite").saveAsTable("bijucatalog.bijusilverschema.streaming_data")
#df.createOrReplaceTempView("bijucatalog.bijusilverschema.streaming_data")


In [0]:
%sql
select * from bijucatalog.bijusilverschema.streaming_data

In [0]:
from pyspark.sql.functions import window
source_path = "s3://databricksbijubucketnew/raw/"
archive_base = "s3://databricksbijubucketnew/archive/"
checkpoint = "s3://databricksbijubucketnew/checkpoints/"
  
# Read the stream
stream_df = (
    spark.readStream.format("delta")
    .table("bijucatalog.bijusilverschema.streaming_data")
    .withWatermark("event_time", "10 minutes")
)

# Apply windowed aggregation
aggregated = stream_df.groupBy(
    window("event_time", "15 minutes")
).count()

# Write results to console (or delta)
query = (
    aggregated.writeStream
    .outputMode("append")
    .format("delta")
    .option("checkpointLocation", f"{checkpoint}/streaming")
    .trigger(availableNow=True)
    .toTable("bijucatalog.bijusilverschema.event_counts")  # replace with your catalog.schema.table
)



#query.awaitTermination()

In [0]:
%sql
select * from bijucatalog.bijusilverschema.event_counts