# ARM Acknowledgment 


**Autoloader set up**  
This Notebook sets up an Autoloader job that runs on a manual trigger to collect ack messages from the ack eventhubs


In [0]:
import json
import time
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType, LongType
from pyspark.sql.functions import col,from_json
import json

In [0]:

ack_schema = StructType([
    StructField("filename", StringType(), True),
    StructField("http_response", IntegerType(),True),
    StructField("timestamp", TimestampType(), True),
    StructField("http_message", StringType(), True)
])

In [0]:
config = spark.read.option("multiline", "true").json("dbfs:/configs/config.json")
env = config.first()["env"].strip().lower()
lz_key = config.first()["lz_key"].strip().lower()

In [0]:
keyvault_name = f"ingest{lz_key}-meta002-{env}"

# Access the Service Principle secrets from keyvaults
client_secret = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-SECRET')
tenant_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-TENANT-ID')
client_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-ID')

In [0]:
EH_NAMESPACE = f"ingest{lz_key}-integration-eventHubNamespace001-{env}"
EH_NAME = f"evh-sbl-ack-{lz_key}-uks-dlrm-01"

In [0]:
## set up the configuration to allow the autoloader to connect to the source system

## Eventhub details
connection_string = dbutils.secrets.get(keyvault_name, "RootManageSharedAccessKey")

# Encrypt the connection string using the EventHubsUtils.encrypt method
#encrypted_conn_str = sc._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(connection_string)


# ehConf = {
#     "eventhubs.connectionString": encrypted_conn_str,
#     "eventhubs.consumerGroup": consumer_group,
#     # "eventhubs.startingPositions": starting_positions_json
# }



# eventhubdf = spark.readStream.format("eventhubs")\
#     .options(**ehConf)\
#         .load()

# eventhubdf.display()

In [0]:
KAFKA_OPTIONS = {
    "kafka.bootstrap.servers": f"{EH_NAMESPACE}.servicebus.windows.net:9093",
    "subscribe": EH_NAME,
    "startingOffsets": "earliest",
    "kafka.security.protocol": "SASL_SSL",
    "failOnDataLoss": "false",
    "kafka.sasl.mechanism": "PLAIN",
    "kafka.sasl.jaas.config": f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required username="$ConnectionString" password="{connection_string}";'
}

In [0]:
# # Service principal credentials
# client_id = dbutils.secrets.get("ingest00-meta002-sbox", "SERVICE-PRINCIPLE-CLIENT-ID")
# client_secret = dbutils.secrets.get("ingest00-meta002-sbox", "SERVICE-PRINCIPLE-CLIENT-SECRET")
# tenant_id = dbutils.secrets.get("ingest00-meta002-sbox", "SERVICE-PRINCIPLE-TENANT-ID")

# Storage account names
curated_storage = f"ingest{lz_key}curated{env}"
checkpoint_storage = f"ingest{lz_key}xcutting{env}"

# Spark config for curated storage (Delta table)
spark.conf.set(f"fs.azure.account.auth.type.{curated_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{curated_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{curated_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{curated_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{curated_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{checkpoint_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{checkpoint_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{checkpoint_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{checkpoint_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{checkpoint_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

In [0]:
# Container and path for storing Delta table (in curated storage account)
data_path = f"abfss://silver@ingest{lz_key}curated{env}.dfs.core.windows.net/ARIADM/ARM/AUDIT/SBAILS/sbails_ack_audit"

# Container and path for checkpoint (in xcuttings storage account)
checkpoint_path = f"abfss://db-ack-checkpoint@ingest{lz_key}xcutting{env}.dfs.core.windows.net/SBAILS/ACK/ack"


In [0]:
eventhubdf = spark.readStream.format("kafka")\
    .options(**KAFKA_OPTIONS)\
        .load()

In [0]:
parsed_df = (
    eventhubdf
    # 'body' is binary, so we cast to string (assuming UTF-8)
    .select(col("value").cast("string").alias("json_str"))
    .select(from_json(col("json_str"), ack_schema).alias("json_obj"))
    .select("json_obj.*")
)


# parsed_df.display()

In [0]:
# dbutils.fs.ls("/mnt/ingest00curatedsboxsilver/ARIADM/ARM/AUDIT/SBAILS")

In [0]:
query = parsed_df.writeStream \
    .format("delta") \
    .option("checkpointLocation", checkpoint_path) \
    .outputMode("append") \
    .start(data_path)

time.sleep(30)

query.stop()

In [0]:
df = spark.read.format("delta").load(data_path)
display(df)

In [0]:
dbutils.notebook.exit("Notebook completed successfully")