The Python library base64 provides functions to encode binary data into ASCII characters using Base64 encoding and decode Base64-encoded data back into its original binary form.

In [28]:
import requests
import json
import base64
import os
from datetime import datetime
from pyspark.sql import functions as F
from pyspark.sql.functions import current_timestamp, date_format, col, expr

StatementMeta(, 960db8d9-7f9e-4db1-8a64-309cb46bcedf, 37, Finished, Available, Finished)

### Accessing the API 

Line 7 of the code:
takes a string, converts it to its binary representation, encodes the binary data using Base64 encoding, and then converts the resulting Base64-encoded binary data back into a string. The resulting credentials string contains the Base64-encoded representation of the original identifier string.

In [None]:
# Get client credentials from Azure Key Vault
client_id = mssparkutils.credentials.getSecret('AZURE_KEY_VAULT_URL','SECRET_ID') 
client_secret = mssparkutils.credentials.getSecret('AZURE_KEY_VAULT_URL','SECRET_ID') 

# Construct client credentials string and encode in Base64
identifier = f"{client_id}:{client_secret}"
credentials = base64.b64encode(identifier.encode()).decode()

# Token endpoint URL
token_endpoint = "URL"

# Request body parameters
params = {
    "scope": "",
    "grant_type": ""
}

# Request headers
headers = {
    "Content-Type": "application/x-www-form-urlencoded",
    "Authorization": "Basic " + credentials
}

# Send HTTP POST request
response = requests.post(token_endpoint, data=params, headers=headers)

# Parse response JSON
token_data = response.json()

# Extract access token
access_token = token_data.get("access_token")

#### The request endpoint requires 'from' and 'to' date values in a specific format to retrieve data. These values are stored to be referenced for daily calls

In [None]:
path = "/lakehouse/default/Files/"

try:
    newest_folder = find_latest_date_folder(path) # Function defined in Functions Notebook
except Exception as e:
    print(f"Folder hirarchy does not exist: {e}.")
    newest_folder = None

if newest_folder is not None:

    # If previous PULL existis, use previously stored toDate as fromDate in current GET request
    last_pull_path = f"{path}/{newest_folder}/{newest_folder}_date_info.json"
    df = spark.read.json(last_pull_path)

    df = df.select("toDate")
    last_pull_date = df.collect()
    from_date_str = last_pull_date[0]["toDate"]

    # Get the current date/time in the required format
    current_datetime_df = spark.range(1).select(current_timestamp().alias("current_datetime"))
    to_date_str = current_datetime_df.select(date_format(col("current_datetime"), "yyyy-MM-dd'T'HH:mm:ss.SSSSS").alias("to_date")).collect()[0][0]

else:
    # Get the current date/time in the required format
    current_datetime_df = spark.range(1).select(current_timestamp().alias("current_datetime"))
    to_date_str = current_datetime_df.select(date_format(col("current_datetime"), "yyyy-MM-dd'T'HH:mm:ss.SSSSS").alias("to_date")).collect()[0][0]

    # Subtract 24 hours from to_date to get from_date
    current_datetime_df = current_datetime_df.withColumn("from_datetime", expr("current_datetime - interval 24 hours"))
    from_date_str = current_datetime_df.select(date_format(col("from_datetime"), "yyyy-MM-dd'T'HH:mm:ss.SSSSS").alias("from_date")).collect()[0][0]


# GET request for endpoint Journeys
endpoint = f"URL?todate={to_date_str}&fromdate={from_date_str}"

headers = {
    "Authorization": "Bearer " + str(access_token)
}

request = requests.get(endpoint, headers=headers)
data = request.json()

#### Save JSON to bronze layer

In [None]:
try:
    current_date = datetime.now().strftime("%Y%m%d")

    # Define the directory for the output file
    output_dir = f"/lakehouse/default/Files/Data/{current_date}"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Define the output file path
    output_file = f"{output_dir}/{current_date}_data.json"

    # Write the JSON data to the output file
    with open(output_file, "w") as f:
        json.dump(data, f)

    # Check the file size
    file_size = os.path.getsize(output_file)
    print("JSON data has been exported to:", output_file)
    print("File size:", file_size, "bytes")

    date_info_file = f"{output_dir}/{current_date}_date_info.json"

    date_info = {
        "Info": "Data captured between",
        "fromDate": from_date_str,
        "toDate": to_date_str
    }

    with open(date_info_file, "w") as f:
        json.dump(date_info, f)

except Exception as e:
    print("An error occurred:", e)