In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
import sys
import os
sys.path.append(os.path.abspath('./odibi_de_v2'))
os.environ["PYTHONDONTWRITEBYTECODE"] = "1"
from odibi_de_v2.connector import AzureBlobConnector
from odibi_de_v2.ingestion import SparkStreamingDataReader
from odibi_de_v2.storage import SparkStreamingDataSaver
from odibi_de_v2.core import DataType
from odibi_de_v2.core import Framework
from pyspark.sql import SparkSession


In [0]:
def set_qat_azure_storage_account():
    spark = SparkSession.builder.appName("qat_azure_storage_account").getOrCreate()
    qat_storage_Name = dbutils.secrets.get("GOATKeyVault", "GoatBlobStorageName")
    qat_storage_key = dbutils.secrets.get("GOATKeyVault", "GoatBlobStorageKey")
    spark.conf.set(
    f"fs.azure.account.key.{qat_storage_Name}.dfs.core.windows.net",
    qat_storage_key)
    connector = AzureBlobConnector(
        account_name=qat_storage_Name,
        account_key=qat_storage_key
)
    return spark, connector

spark, connector = set_qat_azure_storage_account()

In [0]:
file_path = connector.get_file_path(
    "digital-manufacturing",
    "raw_Energy_Cedar_Rapids/Boilers",
    Framework.SPARK)
schema_file_path = connector.get_file_path(
    "digital-manufacturing",
    "raw_Energy_Cedar_Rapids/schema",
    Framework.SPARK
)
reader = SparkStreamingDataReader()
df = reader.read_data(
    data_type=DataType.AVRO,
    file_path=file_path,
    spark=spark,
    options={"cloudFiles.schemaLocation": schema_file_path})

In [0]:
saver_file_path = connector.get_file_path(
    "digital-manufacturing",
    "odibi_de_stream_saver/test",
    Framework.SPARK)

checkpoint_file_path = connector.get_file_path(
    "digital-manufacturing",
    "odibi_de_stream_saver/checkpointa",
    Framework.SPARK)

saver = SparkStreamingDataSaver()
saver.save_data(
    df=df,
    data_type=DataType.DELTA,
    file_path=saver_file_path,
    options={"checkpointLocation": checkpoint_file_path},
    trigger={"availableNow": True})
