In [0]:
spark.sql("""
    CREATE TABLE IF NOT EXISTS `na-dbxtraining`.biju_bronze.test_streaming_table (
        id INT,
        name STRING,
        value STRING,
        ingestion_timestamp TIMESTAMP
    )
    USING iceberg
    COMMENT 'Simple test streaming table using Iceberg'
""")

In [0]:
%sql
select * from `na-dbxtraining`.biju_bronze.test_streaming_table 

In [0]:
from pyspark.sql.functions import col, current_timestamp
from pyspark.sql.types import StructType, StructField, StringType, IntegerType




# Define schema for test data
test_schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("name", StringType(), True),
    StructField("value", StringType(), True)
])

# Create Iceberg table (simplified syntax)
spark.sql("""
    CREATE TABLE IF NOT EXISTS `na-dbxtraining`.biju_bronze.test_streaming_table (
        id INT,
        name STRING,
        value STRING,
        ingestion_timestamp TIMESTAMP
    )
    USING iceberg
    COMMENT 'Simple test streaming table using Iceberg'
""")

# Start streaming
test_stream = (
    spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "json")
        .schema(test_schema)
        .option("cloudFiles.schemaLocation", "/Volumes/na-dbxtraining/biju_raw/biju_vol/test/schema")
        .load("/Volumes/na-dbxtraining/biju_raw/biju_vol/test/data/")
        .select(
            col("id"),
            col("name"),
            col("value"),
            current_timestamp().alias("ingestion_timestamp")
        )
        .writeStream
        .format("iceberg")
        .outputMode("append")
        .option("checkpointLocation", "/Volumes/na-dbxtraining/biju_raw/biju_vol/test/checkpoint")
        .trigger(processingTime="10 seconds")
        .toTable("`na-dbxtraining`.biju_bronze.test_streaming_table")
)

print(f"Stream started: {test_stream.id}")

In [0]:
%sql
select * from `na-dbxtraining`.biju_bronze.test_streaming_table

In [0]:
%sql
-- Run this cell first to create the table
CREATE TABLE IF NOT EXISTS `na-dbxtraining`.biju_bronze.bronze_meter_readingsiceberg (
    customer_id STRING,
    kwh_reading DOUBLE,
    metername STRING,
    raw_source STRING,
    reading_timestamp TIMESTAMP,
    ingestion_timestamp TIMESTAMP
)
USING ICEBERG;

In [0]:
%sql
select * from `na-dbxtraining`.biju_bronze.bronze_meter_readingsiceberg