In [0]:
SCOPE = "dh-keyvault"

ADLS_PATH = dbutils.secrets.get(scope=SCOPE, key="dlt-adls-path")

COSMOS_DB_ENDPOINT = dbutils.secrets.get(scope=SCOPE, key="cosmos-db-endpoint")
COSMOS_DB_MASTER_KEY = dbutils.secrets.get(scope=SCOPE, key="cosmos-db-access-key")
COSMOS_DB_DATABASE = dbutils.secrets.get(scope=SCOPE, key="cosmos-db-database")
COSMOS_DB_CONTAINER = dbutils.secrets.get(scope=SCOPE, key="cosmos-db-container")

In [0]:
from pyspark.sql import functions as F
from pyspark.sql import types as T
import dlt


# 1. Define a function to read from Cosmos DB Change Feed
@dlt.table(
    comment="This table reads change feed data from Cosmos DB in real time."
)
def cosmosdb_raw_data():
    return (
        spark.readStream.format("cosmos.oltp.changefeed")
        .option("spark.cosmos.accountEndpoint", COSMOS_DB_ENDPOINT)
        .option("spark.cosmos.accountKey", COSMOS_DB_MASTER_KEY)
        .option("spark.cosmos.database", COSMOS_DB_DATABASE)
        .option("spark.cosmos.container", COSMOS_DB_CONTAINER)
        .option("spark.cosmos.changefeed.mode", "Incremental")  # Options: FullFidelity, Incremental
        .load()
    )

# 2. Transform and Clean Data (Optional)
@dlt.table(
    comment="Cleaned and transformed data from Cosmos DB Change Feed."
)
def cleaned_data():
    raw_data = dlt.read_stream("cosmosdb_raw_data")
    return raw_data

# 3. Write the data to ADLS
@dlt.table(
    comment="Store cleaned data into ADLS Delta Table.",
    path=ADLS_PATH,
    table_properties={"delta.format.checkpointInterval": "5 minutes"}
)
def adls_delta_table():
    return dlt.read("cleaned_data")
