#### Import Libraries

In [None]:
import requests
from azure.identity import ClientSecretCredential

from azure.identity import DefaultAzureCredential
from azure.keyvault.secrets import SecretClient

import requests, time, pandas as pd, concurrent.futures as cf


#### Define variables

In [None]:
KEY_VAULT_URI ="your-keyvault-url"
TENANT_ID_SECRET = "TenantID"
CLIENT_ID_SECRET  = "ClientID"
CLIENT_SECRET_SECRET = "ClientSecret"

ARCHIVE_WORKSPACE_ID  = "YOUR_ARCHIVE_WORKSPACE_ID"  

#### Get SP credentails from Key Vault

In [None]:
class _FabricKVToken:
    def get_token(self, *scopes, **kwargs):
        t = mssparkutils.credentials.getToken("https://vault.azure.net")
        return type("Tok", (), {"token": t, "expires_on": int(time.time()) + 3600})()

# ---------  Read SP credentials from Key Vault ----------
kv = SecretClient(vault_url=KEY_VAULT_URI, credential=_FabricKVToken())
TENANT_ID    = kv.get_secret(TENANT_ID_SECRET).value
CLIENT_ID    = kv.get_secret(CLIENT_ID_SECRET).value
CLIENT_SECRET = kv.get_secret(CLIENT_SECRET_SECRET).value

#### Delete Archived Reports

In [None]:
df_reports = spark.sql(f"""
    SELECT 
        `Dataset Id`
    FROM LH_Monitoring.dbo.dim_reports
    WHERE `Workspace Id` = '{ARCHIVE_WORKSPACE_ID}'
      AND IsDeleted = 0
      AND `Created Date` < date_sub(current_timestamp(), 30)
""")

report_list = df_reports.rdd.flatMap(lambda x: x).collect()

for r in report_list:
    REPORT_ID     = r   # the report to delete

    # Get token for Fabric REST
    authority = f"https://login.microsoftonline.com/{TENANT_ID}"
    app = msal.ConfidentialClientApplication(CLIENT_ID, authority=authority, client_credential=CLIENT_SECRET)
    token = app.acquire_token_for_client(scopes=SCOPE)
    access_token = token["access_token"]

    # Call Delete Report
    url = f"https://api.fabric.microsoft.com/v1/workspaces/{ARCHIVE_WORKSPACE_ID}/reports/{REPORT_ID}"
    resp = requests.delete(url, headers={"Authorization": f"Bearer {access_token}"})
    print(resp.status_code, resp.text or "Deleted (200)")

    spark.sql(f"UPDATE report_archive_history SET DeletedOn = now() WHERE ReportId = '{REPORT_ID}'")

#### Delete Unused Datasets

In [None]:
# Get Token
scope = "https://analysis.windows.net/powerbi/api/.default"
cred  = ClientSecretCredential(tenant_id=TENANT_ID, client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
token = cred.get_token(scope).token
headers = {"Authorization": f"Bearer {token}"}

df_datasets = spark.sql(f"""
SELECT 
    d.`Dataset Id`, 
    COUNT(r.`Report Id`) AS Report_Count
FROM dim_datasets d
LEFT JOIN dim_reports r 
    ON d.`Dataset Id` = r.`Dataset Id`
    AND r.IsDeleted = 0
    AND  r.`Workspace Id` =! '{ARCHIVE_WORKSPACE_ID}'
WHERE d.`Dataset Id` IN (
    SELECT 
        `Dataset Id`
    FROM LH_Monitoring.dbo.dim_reports
    WHERE `Workspace Id` = '{ARCHIVE_WORKSPACE_ID}'
      AND IsDeleted = 0
      AND `Created Date` < date_sub(current_timestamp(), 5)
)

GROUP BY 
    d.`Dataset Id`
HAVING 
    COUNT(r.`Report Id`) < 1;

""")

dataset_list = df_datasets.rdd.flatMap(lambda x: x).collect()

for DATASET_ID in dataset_list:

    WORKSPACE_ID = spark.sql(f"SELECT `Workspace Id` FROM dim_datasets WHERE `Dataset Id` = '{DATASET_ID}'").collect()[0][0]
    # ==== Delete dataset (workspace-scoped endpoint is safest) ====
    url = f"https://api.powerbi.com/v1.0/myorg/groups/{WORKSPACE_ID}/datasets/{DATASET_ID}"
    r = requests.delete(url, headers=headers)

    if r.status_code in (200, 202, 204):
        print(f"✔ Deleted dataset {DATASET_ID} in workspace {WORKSPACE_ID}")
    else:
        print(f"✖ Delete failed: {r.status_code} {r.text[:300]}")