# Streaming data from Cosmos DB to spark delta table

In [1]:
// paths
val deltaCheckpointsPath = "abfss://xxx.dfs.core.windows.net/deltaCheckpoints"
val deltaTablesPath = "abfss://xxx.dfs.core.windows.net/deltaTables"

// connect to cosmos db streaming
val dfStream = spark.readStream.
    format("cosmos.oltp").
    option("spark.synapse.linkedService", "SynapseDb").
    option("spark.cosmos.container", "Tenant").
    option("spark.cosmos.changeFeed.readEnabled", "true").
    option("spark.cosmos.changeFeed.startFromTheBeginning", "true").
    option("spark.cosmos.changeFeed.checkpointLocation", "/localReadCheckpointFolder").
    option("spark.cosmos.changeFeed.queryName", "streamQuery").
    load()

// do some data transformation
val records = dfStream.
    select(
        from_unixtime($"_ts").cast("timestamp").as("createTime"),
        $"id",
        $"aadTenantId",
        $"country",
        $"onboardTime".cast("timestamp"),
        $"offboardTime".cast("timestamp"),
        $"licenseStatus"
    ).
    withWatermark("createTime", "10 minutes")

// save into delta tables
records.
    writeStream.
    format("delta").
    outputMode("append").
    partitionBy("country").
    option("checkpointLocation", deltaCheckpointsPath).
    start(deltaTablesPath).
    awaitTermination()

StatementMeta(smallpool, 8, 1, Cancelled, Cancelling)