In [0]:
dbutils.widgets.text("proc_name", "", "Process Name")
dbutils.widgets.text("proc_run_id", "", "Process Run ID")
dbutils.widgets.text("data_dt", "", "Data Date")
proc_name = dbutils.widgets.get("proc_name")
proc_run_id = dbutils.widgets.get("proc_run_id")
data_dt = dbutils.widgets.get("data_dt")

In [0]:
%run /Workspace/Users/themallpocaws@inteltion.com/Data-Integration-Scenarios/00_common/common_function

##### save file to s3

In [0]:
%pip install azure-storage-blob


In [0]:
from azure.storage.blob import BlobServiceClient
import boto3
from botocore.exceptions import ClientError
from io import StringIO, BytesIO


In [0]:
def get_secret():
  secret_name = "TMG/POC/Azure_Storage_Account"
  region_name = "ap-southeast-1"

  # Create a Secrets Manager client
  session = boto3.session.Session()
  client = session.client(
      service_name='secretsmanager',
      region_name=region_name
  )

  try:
      get_secret_value_response = client.get_secret_value(
          SecretId=secret_name
      )
  except ClientError as e:
      # For a list of exceptions thrown, see
      # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
      raise e
  
  return json.loads(get_secret_value_response['SecretString'])

##### Get Azure storage account credentials

In [0]:
# Azure Storage connection details
secret = get_secret() 
AccountKey = secret["AccountKey"]
AccountName = 'presalesstint'


In [0]:
batch_status=set_process_success()
try:
    # Read the table from Unity Catalog
    df = spark.table("poc_dwh.poc.poc_fact_sales")
    _data_dt=data_dt.replace("-","") 
    # Reduce to a single partition so that you get one CSV file
    df_single = df.coalesce(1)
    temp_output = 's3://tmg-poc-awsdb-apse1-stack-97db8-bucket/unity-catalog/catalog/poc_landing/temp_outbound/azure_storage/'
    # Write the DataFrame to S3 in CSV format with pipe delimiter and header
    df_single.write \
        .option("delimiter", ",") \
        .option("header", "true") \
        .mode("overwrite") \
        .csv(temp_output)

    # List the files in the temporary directory
    files = dbutils.fs.ls(temp_output)

    # Find the CSV file (the one that starts with "part-")
    csv_file = [f.path for f in files if f.name.endswith(".csv")][0]
    final_output = f"s3://tmg-poc-awsdb-apse1-stack-97db8-bucket/unity-catalog/catalog/poc_outbound/azure_storage/3_POC_Fact_Sales_{_data_dt}.csv"
    archived_output = f"s3://tmg-poc-awsdb-apse1-stack-97db8-bucket/unity-catalog/catalog/poc_outbound/azure_storage/archived/3_POC_Fact_Sales_{_data_dt}.csv"
    dbutils.fs.mv(csv_file, final_output)
except Exception as err:
    print(f'error : {err}')
    raise err

##### sync to azure storage

In [0]:
try:
   
    connection_string = f"DefaultEndpointsProtocol=https;AccountName={AccountName};AccountKey={AccountKey};EndpointSuffix=core.windows.net"

    # Specify the container name and blob (file) name for Azure
    container_name = "tmg-poc"
    blob_name = f"3_POC_Fact_Sales_{_data_dt}.csv"

    # S3 file URI (source)
    s3_uri = final_output
    local_file_path='/tmp/test.csv'
    dbutils.fs.cp(s3_uri, "file:" + local_file_path)


    # Create the BlobServiceClient object to interact with Azure Blob Storage
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    # Get a blob client for the target container and blob name
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)

    # Open the local file in binary mode and upload it to Azure Blob Storage
    with open(local_file_path, "rb") as data:
        blob_client.upload_blob(data, overwrite=True)

    print("File uploaded successfully!")
    dbutils.fs.mv(final_output,archived_output )
except Exception as e:
    print(f"An error occurred: {e}")
    batch_status=set_process_failed(e)

dbutils.notebook.exit(batch_status)
