In [0]:
# from azure.storage.blob import BlobServiceClient

In [0]:
# sas_token = dbutils.secrets.get(scope="ingest00-meta002-sbox",key="ARIAB-SAS-TOKEN")
# storage_account_name = "a360c2x2555dz"
# container_name = "dropzone"
# sub_dir = "ARIAB/submission"

# spark.conf.set(
#     f"fs.azure.account.key.{storage_account_name}.blob.core.windows.net",
#     "{sas_token}"
# )


In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType, LongType
from pyspark.sql.functions import *


expected_schema = StructType([
    StructField("operation", StringType(), True),
    StructField("transaction_id", StringType(), True),
    StructField("relation_id", StringType(), True),
    StructField("a360_record_id", StringType(), True),
    StructField("process_time", TimestampType(), True),
    StructField("status", IntegerType(), True),
    StructField("input", StringType(), True),  # Contains nested JSON as a string
    StructField("exception_description", StringType(), True),
    StructField("error_status", StringType(), True),
    StructField("a360_file_id", StringType(), True),
    StructField("file_size", LongType(), True),
    StructField("s_md5", StringType(), True),
    StructField("s_sha256", StringType(), True),

    StructField("timestamp", TimestampType(), True),  # may be used as process_time
    StructField("filename", StringType(), True),
    StructField("submission_folder", StringType(), True),
    StructField("file_hash", StringType(), True)
])
 

In [0]:
#### Set up Auto Loader job
sas_token = dbutils.secrets.get(scope="ingest00-meta002-sbox", key="ARIAB-SAS-TOKEN")
storage_account_name = "a360c2x2555dz"
container_name = "dropzone"
sub_dir = "ARIAB/response"

input_path = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net/{sub_dir}"

spark.conf.set(
    f"fs.azure.sas.{container_name}.{storage_account_name}.blob.core.windows.net",
    sas_token
)

schema_location = "/mnt/autoLoaderSchema/ARMAPPEALS/response/read_stream"

# Define a file regex that matches files ending with .rsp
file_regex = ".*\\.rsp$"
output_container_name = "silver" 
output_storage_account_name = "ingest00curatedsbox"
output_subdir_amalgamated_responses = "ARIADM/ARM/response/APPEALS/amalgamated_responses"
amalgamated_responses_path = f"wasbs://{output_container_name}@{output_storage_account_name}.blob.core.windows.net/{output_subdir_amalgamated_responses}"

output_subdir_input_upload = "ARIADM/ARM/response/APPEALS/input_upload"
input_upload_responses_path = f"wasbs://{output_container_name}@{output_storage_account_name}.blob.core.windows.net/{output_subdir_input_upload}"

output_subdir_create_record_upload = "ARIADM/ARM/response/APPEALS/create_record"
create_record_responses_path = f"wasbs://{output_container_name}@{output_storage_account_name}.blob.core.windows.net/{output_subdir_create_record_upload}"

output_subdir_upload_file_upload = "ARIADM/ARM/response/APPEALS/upload_file"
upload_file_responses_path = f"wasbs://{output_container_name}@{output_storage_account_name}.blob.core.windows.net/{output_subdir_upload_file_upload}"

In [0]:
### Run Autoloader Read Stream
df = (spark.readStream
    .format("cloudFiles")
    .schema(expected_schema)
    .option("cloudFiles.format", "json")
    .option("cloudFiles.schemaLocation", schema_location)  # Schema tracking
    .option("multiline", "true")
    .option("cloudFiles.schemaEvolutionMode", "none")
    .load(input_path)
    .select(
        "*",
        col("_metadata.file_path").alias("_file_path"),
        col("_metadata.file_modification_time").alias("_file_modification_time")
    )
    .withColumn("id", expr("uuid()"))  # Generates UUID correctly
)

# display(df)



In [0]:
### 4 write streams to Silver container in blob storage

output_sas = dbutils.secrets.get(scope="ingest00-meta002-sbox", key="CURATED-SAS-TOKEN")

spark.conf.set(
    f"fs.azure.sas.{output_container_name}.{output_storage_account_name}.blob.core.windows.net",
    output_sas
)

### first save the complete table

df_amalgamated = df.writeStream \
    .format("delta")\
        .option("checkpointLocation", "/mnt/autoLoaderSchema/ARMAPPEALS/response/amalgamated")\
            .outputMode("append")\
                .trigger(availableNow=True)\
                    .start(amalgamated_responses_path)
df_amalgamated.awaitTermination()

In [0]:
df_amalgamated_responses = spark.read.format("delta").load(amalgamated_responses_path)
display(df_amalgamated_responses)

In [0]:
### Create Record table
df_create_upload = df.filter(col("operation").isin(["create_record"])).select("id","operation","transaction_id","relation_id","a360_record_id","process_time","status","input","exception_description","error_status","_file_path","_file_modification_time") \
    .writeStream \
    .format("delta")\
        .option("checkpointLocation", "/mnt/autoLoaderSchema/ARMAPPEALS/response/create_record")\
            .outputMode("append")\
                .trigger(availableNow=True)\
                .start(create_record_responses_path)

df_create_upload.awaitTermination()

In [0]:
df_create_record_responses = spark.read.format("delta").load(create_record_responses_path)
display(df_create_record_responses)

In [0]:
### upload file table
df_upload = df.filter(col("operation").isin(["upload_new_file"])).select("id","operation","transaction_id","relation_id","a360_record_id","process_time","status","input","exception_description","error_status","a360_file_id","file_size","s_md5","s_sha256","_file_path","_file_modification_time"
)\
    .writeStream \
    .format("delta")\
        .option("checkpointLocation", "/mnt/autoLoaderSchema/ARMAPPEALS/response/upload_file")\
            .outputMode("append")\
                .trigger(availableNow=True)\
                .start(upload_file_responses_path)
df_upload.awaitTermination()

In [0]:
df_create_record_responses = spark.read.format("delta").load(upload_file_responses_path)
display(df_create_record_responses)

In [0]:
### Input upload table

df_input_upload = df.filter(col("operation").isin(["input_upload"])).select("id","operation","timestamp","status","exception_description","error_status","filename","submission_folder","file_hash","_file_path","_file_modification_time"
)\
    .writeStream \
    .format("delta")\
        .option("checkpointLocation", "/mnt/autoLoaderSchema/ARMAPPEALS/response/input_upload")\
            .outputMode("append")\
                .trigger(availableNow=True)\
                .start(input_upload_responses_path)

df_input_upload.awaitTermination()

In [0]:
df_input_upload_responses = spark.read.format("delta").load(input_upload_responses_path)
display(df_input_upload_responses)

In [0]:
dbutils.notebook.exit("Notebook execution completed successfully.")

## Appendix

In [0]:
# dbutils.fs.rm("/mnt/autoLoaderSchema/ARMAPPEALS/response", True)

In [0]:
# dbutils.fs.ls("/mnt/ingest00curatedsboxsilver/ARIADM/ARM/response/APPEALS/")

In [0]:
# dbutils.fs.rm("/mnt/ingest00curatedsboxsilver/ARIADM/ARM/response/APPEALS/", True)

In [0]:
# display(dbutils.fs.ls("/mnt/dropzoneariab/ARIAB/response/"))

In [0]:
dbutils.secrets.list(scope="ingest00-meta002-sbox")

In [0]:

# from azure.storage.blob import ContainerClient

# sas_token = dbutils.secrets.get(scope="ingest00-meta002-sbox", key="ARIATD-SAS-TOKEN")
# storage_account_name = "a360c2x2555dz"
# container_name = "dropzone"
# sub_dir = "ARIATD/submission"
# account_url = f"https://{storage_account_name}.blob.core.windows.net"


In [0]:
# html_df = spark.sql("""
# select CaseNo, HTMLContent as content_str, substring_index(HTMLFileName, '/', -1) as file_path from ariadm_arm_td.gold_td_iris_with_html
# where CaseNo = 'FY/46144/2015'
# """)

# json_df = spark.sql("""
#  select CaseNo, JSONcollection  as content_str, substring_index(JSONFileName, '/', -1) as file_path from ariadm_arm_td.gold_td_iris_with_Json
#   where CaseNo = 'FY/46144/2015';
# """)

# a360_df = spark.sql("""
# select CaseNo, A360Content  as content_str, substring_index(A360FileName, '/', -1) as file_path from ariadm_arm_td.gold_td_iris_with_a360
#  where CaseNo = 'FY/46144/2015';
# """)

# display(html_df)
# display(json_df)
# display(a360_df)

In [0]:

# from datetime import datetime

# results = []

# df = a360_df


# # Callback generator to capture responses for each file upload
# def capture_response(result):
#     def hook(response):
#         http_response = response.http_response
#         result["http_response"] = http_response.status_code
#         result["http_message"] = getattr(http_response, "reason", "No reason provided")
#         result["timestamp"] = datetime.now().isoformat()
#     return hook

# try:
#     # Create a ContainerClient using a container-level SAS token
#     container_client = ContainerClient(
#         account_url=account_url,
#         container_name=container_name,
#         credential=sas_token
#     )
#     print("Container client created")

#     for row in df.collect():
#         message = row.content_str
#         file_path = row.file_path

#         file_name= f"{sub_dir}/{file_path}"

#         blob_client = container_client.get_blob_client(file_name)

#         result = {
#             "filename": file_name,
#             "http_response": None,
#             "timestamp": None,
#             "http_message": None
#         }

#         print("uploading message to blob storage",file_name)
#         blob_client.upload_blob(message,overwrite=True,raw_response_hook=capture_response(result))
#         print("upload complete")

#         results.append(result)


# except Exception as e:
#     print("Failed to connect to blob storage:", e)



In [0]:
# display(dbutils.fs.ls("/mnt/dropzoneariatd/ARIATD/submission/"))

In [0]:
# dbutils.fs.ls("/mnt/dropzoneariatd/ARIATD/response")
