In [0]:
awsAccessKeyId = "Your Access Key ID"
awsSecretKey = "Your Secret Access Key"
kinesisStreamName = "Your Kinesis Stream Name"
kinesisRegion = "Your Kinesis Region" 

In [0]:
from pyspark.sql.types import StructType, StructField, StringType
from pyspark.sql.functions import col, from_json 

# Define schema for the JSON data
schema = StructType([
  StructField("post_id", StringType()), 
  StructField("platform", StringType()),
  StructField("engagement_type", StringType()),
  StructField("user_id", StringType()),
  StructField("timestamp", StringType())
])

# Read streaming data from Kinesis
streaming_raw_df = (
  spark
    .readStream
    .format("kinesis")
    .option("streamName", kinesisStreamName)
    .option("initialPosition", "trim_horizon")
    .option("awsAccessKey", awsAccessKeyId)
    .option("awsSecretKey", awsSecretKey)
    .option("region", kinesisRegion)
  .load()
  # Parse JSON data and extract fields
  .withColumn("value", from_json(col("data").cast("string"), schema))
  .withColumn("key", col("partitionKey").cast("string"))
  .select("value.post_id", "value.platform", "value.engagement_type", "value.user_id", "value.timestamp")
)

In [0]:
# Write the streaming DataFrame to a Delta table with specified options
window_df = (
  streaming_raw_df
    .writeStream
    .outputMode("append")  # Append new records to the table
    .queryName("social_media_streaming_engagement")  
    .trigger(availableNow=True)  # Trigger the query to process available data
    .option("checkpointLocation", "/Volumes/workspace/default/checkpoint/kinesis_stream") 
  .toTable("workspace.default.social_media_engagement_streaming_bronze")  
)

In [0]:
social_media_streaming_engagement.status