In [35]:
# pip install delta-spark
# % pip install azure-storage-file-datalake azure-identity

StatementMeta(, 760dc443-9a5d-4bea-8f46-719d5e49a254, 37, Finished, Available, Finished)

In [36]:
import json, requests
from datetime import date
from azure.identity import DefaultAzureCredential
from azure.storage.filedatalake import (
    DataLakeServiceClient,
    DataLakeDirectoryClient,
    FileSystemClient
)

StatementMeta(, 760dc443-9a5d-4bea-8f46-719d5e49a254, 38, Finished, Available, Finished)

In [37]:
def get_service_client_token_credential(self, account_name) -> DataLakeServiceClient:
    account_url = f"https://{account_name}.dfs.fabric.microsoft.com"
    token_credential = DefaultAzureCredential()

    service_client = DataLakeServiceClient(account_url, credential=token_credential)

    return service_client

StatementMeta(, 760dc443-9a5d-4bea-8f46-719d5e49a254, 39, Finished, Available, Finished)

In [40]:
import requests

# Define the Admin API endpoint
base_url = "https://api.fabric.microsoft.com/v1/admin"
endpoint = "tenantsettings"  # Example: List all workspaces
url = f"{base_url}/{endpoint}"

# Use the built-in authentication token
# Fabric Notebooks automatically provide the token in the environment
from notebookutils import mssparkutils
access_token = mssparkutils.credentials.getToken("https://analysis.windows.net/powerbi/api/.default")

# Set up the headers with the access token
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Make the API call
response = requests.get(url, headers=headers)

# Check the response
if response.status_code == 200:
    print("API call successful!")
    # print(response.json())  # Print the response data
else:
    print(f"API call failed with status code {response.status_code}")    
    print(response.text)


StatementMeta(, 760dc443-9a5d-4bea-8f46-719d5e49a254, 42, Finished, Available, Finished)

API call successful!


In [41]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date, date_format, lit, explode, col
import requests
import json
from datetime import datetime

# Initialize Spark session
spark = SparkSession.builder.appName("TenantSettings").getOrCreate()

# Path to save the file
path = 'Files/TenantSettings/'

# Get the current date in "YYYYMMDD" format
activity_date = datetime.now().strftime("%Y%m%d")

# API call to fetch tenant settings
TenantSettingsURL = 'https://api.fabric.microsoft.com/v1/admin/tenantsettings'
TenantSettingsJSON = requests.get(TenantSettingsURL, headers=headers)

if TenantSettingsJSON.status_code != 200:
    print(f"Failed to retrieve tenant settings! {TenantSettingsJSON.status_code}")
else:
    TenantSettingsJSONContent = json.loads(TenantSettingsJSON.text)
    TenantSettingsJSONContentExplode = TenantSettingsJSONContent['tenantSettings']
    print(TenantSettingsJSONContentExplode)

    # Create a Spark DataFrame from the JSON content
    df = spark.createDataFrame(TenantSettingsJSONContentExplode)

    # Drop the enabledSecurityGroups and properties columns to avoid duplicates
    df = df.drop("enabledSecurityGroups","properties")    

    # Add the ExportedDate column with a literal value
    df = df.withColumn("ExportedDate", lit(activity_date))   

    # Save the DataFrame as a CSV file
    df.write.mode("overwrite").format("csv").option("header", "true").save(path + activity_date + '_TenantSettings.csv')
    display(df)


StatementMeta(, 760dc443-9a5d-4bea-8f46-719d5e49a254, 43, Finished, Available, Finished)

[{'settingName': 'AllowServicePrincipalsUseReadAdminAPIs', 'title': 'Service principals can access read-only admin APIs', 'enabled': True, 'canSpecifySecurityGroups': True, 'enabledSecurityGroups': [{'graphId': 'ac0c8811-771c-4899-8471-c1072429a70d', 'name': 'globaladmins'}, {'graphId': 'e37071df-2e5a-4775-b185-fd43b871eed6', 'name': 'fuam-sg'}], 'tenantSettingGroup': 'Admin API settings'}, {'settingName': 'AllowServicePrincipalsUseWriteAdminAPIs', 'title': 'Service principals can access admin APIs used for updates', 'enabled': False, 'canSpecifySecurityGroups': True, 'tenantSettingGroup': 'Admin API settings'}, {'settingName': 'AdminApisIncludeDetailedMetadata', 'title': 'Enhance admin APIs responses with detailed metadata', 'enabled': True, 'canSpecifySecurityGroups': True, 'tenantSettingGroup': 'Admin API settings'}, {'settingName': 'AdminApisIncludeExpressions', 'title': 'Enhance admin APIs responses with DAX and mashup expressions', 'enabled': True, 'canSpecifySecurityGroups': Tru

SynapseWidget(Synapse.DataFrame, d7a4735d-5be4-4efa-83a8-114a7b8d85e4)

In [43]:
from delta.tables import DeltaTable
from pyspark.sql import SparkSession  
from pyspark.sql.functions import current_timestamp
from datetime import datetime


# Get the current date in "YYYYMMDD" format
activity_date = datetime.now().strftime("%Y%m%d")

# Initialize Spark session
spark = SparkSession.builder \
    .appName("MergeDataFrameToDelta") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

# Read the saved CSV file back into a Spark DataFrame
df_read = spark.read.format("csv").option("header", "true").load(path + activity_date + '_TenantSettings.csv')

# Path to your Delta table in OneLake
delta_table_path = "Tables/TenantSettings/"

# Add a new column for the current timestamp
df_read = df_read.withColumn("lastModifiedDate", current_timestamp())

# Check if the Delta table exists
if DeltaTable.isDeltaTable(spark, delta_table_path):
    # Load the Delta table
    delta_table = DeltaTable.forPath(spark, delta_table_path)

   # Perform the merge operation with the timestamp column
    delta_table.alias("target").merge(
        source=df_read.alias("source"),
        condition="target.settingName = source.settingName"  # Match condition
    ).whenMatchedUpdate(
        
        condition="""
            target.title != source.title OR
            target.enabled != source.enabled OR
            target.canSpecifySecurityGroups != source.canSpecifySecurityGroups OR            
            target.delegateToWorkspace != source.delegateToWorkspace OR
            target.tenantSettingGroup != source.tenantSettingGroup OR
            target.delegateToCapacity != source.delegateToCapacity OR
            target.delegateToDomain != source.delegateToDomain
        """,

        set={
            "settingName": "source.settingName",
            "title": "source.title",
            "enabled": "source.enabled",
            "canSpecifySecurityGroups": "source.canSpecifySecurityGroups",
            "ExportedDate": "source.ExportedDate",
            "delegateToWorkspace": "source.delegateToWorkspace",
            "tenantSettingGroup": "source.tenantSettingGroup",
            "delegateToCapacity": "source.delegateToCapacity",            
            "delegateToDomain": "source.delegateToDomain",
            "lastModifiedDate": "current_timestamp()"  # Update timestamp    
        }
    ).whenNotMatchedInsert(
        values={           
            "canSpecifySecurityGroups": "source.canSpecifySecurityGroups",
            "settingName": "source.settingName",
            "tenantSettingGroup": "source.tenantSettingGroup",
            "title": "source.title",
            "enabled": "source.enabled",
            "canSpecifySecurityGroups": "source.canSpecifySecurityGroups",
            "delegateToCapacity": "source.delegateToCapacity",            
            "delegateToDomain": "source.delegateToDomain",
            "delegateToWorkspace": "source.delegateToWorkspace",
            "ExportedDate": "source.ExportedDate",
            "lastModifiedDate": "current_timestamp()"  # Insert timestamp   
        }
    ).execute()
else:
    # If the Delta table doesn't exist, write the DataFrame as a new Delta table
    df_read.write.format("delta").mode("overwrite").save(delta_table_path)

    # Refresh the Delta table metadata
    spark.sql(f"REFRESH TABLE delta.`{delta_table_path}`")

    # Enable Change Data Feed using SQL
    spark.sql(f"""
        ALTER TABLE delta.`{delta_table_path}`
        SET TBLPROPERTIES (delta.enableChangeDataFeed = true)
    """)

    print("Change Data Feed (CDF) enabled successfully!")

print("Merge operation completed successfully!")


StatementMeta(, 760dc443-9a5d-4bea-8f46-719d5e49a254, 45, Finished, Available, Finished)

Merge operation completed successfully!
