# Simple Microsoft Fabric Items Retrieval via REST API
Last Updated: March 2025
Purpose: 
- Retrieve all Fabric items efficiently with minimal Capacity Unit usage
- Store results in Delta format for analysis and tracking
 
IMPORTANT NOTES:
- As of March 2025, Dataflow Gen2 is not included in the Fabric items
- This only retrieves Fabric Items, not Power BI Items
- Using this Python notebook uses significantly less Capacity Units than a Spark notebook

In [None]:
import notebookutils
import json
import requests
import pandas as pd
import datetime
from datetime import datetime, date, timedelta
import time
import duckdb
from deltalake import write_deltalake, DeltaTable
import pyarrow

# SECTION 1: Authentication Setup

In [None]:
# Retrieve secrets from Azure Key Vault for secure authentication
# The key vault stores sensitive information like tenant ID, application ID and client secret
key_vault = "https://company-keyvault.vault.azure.net/"

# Tenant ID for the Azure AD tenant housing the Fabric workspace
tenant = notebookutils.credentials.getSecret(key_vault, "tenantid") 

# Application ID for the service principal with appropriate Fabric permissions
client = notebookutils.credentials.getSecret(key_vault, "powerbi-applicationid") 

# Client secret for authentication with the service principal
client_secret = notebookutils.credentials.getSecret(key_vault, "powerbi-clientsecret")  

# Import required authentication libraries with error handling
try: 
    from azure.identity import ClientSecretCredential 
except Exception:
    # Install the library if not available
    !pip install azure.identity 
    from azure.identity import ClientSecretCredential 

# Set up authentication parameters
# Using the Power BI API scope since Fabric API leverages the same authentication framework
api = 'https://analysis.windows.net/powerbi/api/.default' 

# Create the credential object for service principal authentication
auth = ClientSecretCredential(
    authority = 'https://login.microsoftonline.com/', 
    tenant_id = tenant, 
    client_id = client, 
    client_secret = client_secret
) 

# Retrieve the access token that will be used for all API calls
access_token = auth.get_token(api)
access_token = access_token.token 

# Set up HTTP headers with the access token for API authentication
header = {'Authorization': f'Bearer {access_token}'}  

print('\nSuccessfully authenticated to Microsoft Fabric API.')

# SECTION 2: API Data Retrieval

In [None]:
# Define the Microsoft Fabric API endpoint for retrieving items
# This endpoint provides all Fabric items that the service principal has access to
FABRIC_API_URL = 'https://api.fabric.microsoft.com/v1/admin/items'

# Set up headers with authentication token and content type
headers = {
    'Authorization': 'Bearer ' + access_token,
    'Content-Type': 'application/json'
}

# Initialize an empty list to store all Fabric items
# We'll collect all pages of results through the pagination mechanism
all_fabric_items = []

# Initialize the first API request
# We'll update this URL with each pagination link for subsequent requests
current_api_url = FABRIC_API_URL

# Implement pagination to retrieve all items
# The Fabric API returns data in pages, and we need to follow the @odata.nextLink to get all items
print("Starting Fabric items retrieval...")
page_count = 0

# Begin pagination loop
while current_api_url:
    # Make the API request for the current page
    response = requests.get(current_api_url, headers=headers)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Error retrieving data: Status code {response.status_code}")
        print(f"Response: {response.text}")
        break
    
    # Parse the JSON response
    data = response.json()
    page_count += 1
    
    # Extract and store the Fabric items from the current page
    items_in_page = data.get('itemEntities', [])
    all_fabric_items.extend(items_in_page)
    print(f"Retrieved page {page_count} with {len(items_in_page)} items")
    
    # Get the URL for the next page of results, if available
    # If there are no more pages, this will be None and the loop will exit
    current_api_url = data.get('@odata.nextLink')
    
    # Optional: Add a small delay to avoid rate limiting
    if current_api_url:
        time.sleep(0.5)

print(f"Completed retrieval of {len(all_fabric_items)} Fabric items across {page_count} pages")

# SECTION 3: Save Raw JSON Data

In [None]:
# Create directory for storing the raw JSON data if it doesn't already exist
# This allows us to keep historical snapshots of the Fabric items
notebookutils.fs.mkdirs("Files/Fabric_Items/")

# Generate filename with current date for historical tracking
fileName = 'Fabric_Items_' + (datetime.today()).strftime('%Y%m%d') + '.json'
file_path = f"/lakehouse/default/Files/Fabric_Items/{fileName}"

# Write the output to a JSON file
try:
    with open(file_path, "w") as json_file:
        json.dump(all_fabric_items, json_file, indent=4)
    print(f"Successfully saved raw data to {file_path}")
except Exception as e:
    print(f"Error saving JSON file: {e}")

# SECTION 4: Process and Store in Delta Format

In [None]:
# Load the JSON file we just saved
# This step can be adjusted if you want to process the data directly without saving to JSON first
try:
    with open(file_path) as f:
        data = json.load(f)
    
    # Flatten the nested JSON structure using pandas json_normalize
    # This converts the hierarchical JSON into a flat dataframe for easier analysis
    df = pd.json_normalize(data)
    print(f"Successfully loaded and normalized data with {len(df)} rows and {len(df.columns)} columns")
    
    # Set storage options for writing to Delta format
    # The bearer token is required for authentication to the storage
    storage_options = {
        "use_fabric_endpoint": "true", 
        "allow_unsafe_rename": "true", 
        "bearer_token": notebookutils.credentials.getToken('storage')
    }
    
    # Path for the staging table where we'll initially store the data
    staging_path = f"/lakehouse/default/Tables/staging_all_fabric_items"
    
    # Write dataframe to Delta format in the staging location
    # Using 'overwrite' mode to replace any existing data
    write_deltalake(
        staging_path, 
        df, 
        mode="overwrite", 
        engine='rust', 
        storage_options=storage_options
    )
    print(f"Successfully wrote data to staging table at {staging_path}")
    
except Exception as e:
    print(f"Error in data processing or Delta write: {e}")

# SECTION 5: Merge Data with Existing Records (Upsert Operation)

In [None]:
# Connect to DuckDB for performing the merge operation
# DuckDB provides efficient SQL operations on local data
con = duckdb.connect()

# Install and load the Delta extension to enable DuckDB to work with Delta tables
try:
    con.execute("INSTALL delta;")
    con.execute("LOAD delta;")
    print("DuckDB Delta extension loaded successfully")
except Exception as e:
    print(f"Error loading Delta extension: {e}")

# Define paths for source (staging) and target (production) tables
source_path = "/lakehouse/default/Tables/staging_all_fabric_items"
target_path = "/lakehouse/default/Tables/all_fabric_items"

try:
    # Register Delta tables in DuckDB
    # Create temporary tables that point to our Delta tables
    # Note: If target doesn't exist yet (first run), this will create it
    con.execute(f"CREATE TABLE source_table AS SELECT * FROM delta_scan('{source_path}')")
    
    # Check if target table exists
    try:
        con.execute(f"CREATE TABLE target_table AS SELECT * FROM delta_scan('{target_path}')")
        target_exists = True
    except Exception:
        print("Target table doesn't exist yet - this appears to be the first run")
        target_exists = False
    
    # If target exists, perform UPDATE and INSERT operations (upsert)
    if target_exists:
        # Update existing records
        con.execute("""
            UPDATE target_table AS t
            SET name = s.name,
                type = s.type
            FROM source_table AS s
            WHERE t.id = s.id
        """)
        
        # Insert new records
        con.execute("""
            INSERT INTO target_table
            SELECT s.*
            FROM source_table AS s
            LEFT JOIN target_table AS t ON s.id = t.id
            WHERE t.id IS NULL
        """)
        
        # Get the final dataset
        result = con.execute("SELECT * FROM target_table").fetchdf()
    else:
        # For first run, just use the source data
        result = con.execute("SELECT * FROM source_table").fetchdf()
    
    print(f"Final dataset has {len(result)} rows")
    display(result)
    
    # Write the final dataset back to Delta storage
    # Using a temporary "New" table that we can swap in production later if needed
    write_deltalake(
        f"/lakehouse/default/Tables/all_fabric_items", 
        result,
        engine='rust',
        mode="overwrite",  
        storage_options={"allow_unsafe_rename":"true"}
    )
    print("Successfully updated the production Fabric items table")
    
except Exception as e:
    print(f"Error during merge operation: {e}")