In [None]:
# ========== CODE CELL 1 ==========
# Reload the purview_utils module to pick up changes
import importlib
import custom_libs.purview_utils
import custom_libs.sharepoint_utils
importlib.reload(custom_libs.purview_utils)
importlib.reload(custom_libs.sharepoint_utils)
from custom_libs.purview_utils import loadPurviewAssets, applyPurviewClassifications
from custom_libs.sharepoint_utils import SharePointUtils

# Recreate SharePoint client with updated code
sharepointClient = SharePointUtils()
sharepointClient.loadEnvFile()
response = sharepointClient.msgraph_auth()

print("‚úÖ Reloaded purview_utils and sharepoint_utils modules")
print("‚úÖ Recreated SharePoint client")

# üöÄ Getting Started

üí°<b> Before running this notebook</b>, ensure you have configured SharePoint, Azure AI Foundry, set up an application for handling API authentication, granted appropriate roles in Microsoft Purview, and set the appropriate configuration parameters. [Steps listed here.](README.md)

## 1. Setup

### 1.1 Install required libraries

In [None]:
# ========== CODE CELL 5 ==========
!pip install -r requirements.txt

### 1.2 Load libraries

In [None]:
# ========== CODE CELL 7 ==========
import os
# The JSON module could be potentially removed
import json
from azure.identity import ClientSecretCredential
from pyapacheatlas.core import PurviewClient
from purviewautomation import PurviewCollections, ServicePrincipalAuthentication
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential
from pyapacheatlas.core.typedef import ClassificationTypeDef, EntityTypeDef
# Purview custom libraries
from custom_libs.purview_utils import (
    filesystemFileSampleList,
    listFilesystemFiles,
    getAADToken,
    moveCollection,
    estimateTokens,
    unstructuredDataClassification,
    rollupClassifications,
    loadPurviewAssets,
    applyPurviewClassifications
)
# SharePoint custom libraries
from custom_libs.sharepoint_utils import (
    SharePointUtils,
)

### 1.2 Initialize Environment

Before running this notebook, you must configure certain environment variables. We will now use environment variables to store our configuration. This is a more secure practice as it prevents sensitive data from being accidentally committed and pushed to version control systems.

Create a `.env` file in your project root (use the provided `.env.sample` as a template). [Detailed steps here](README.md)

> üìå **Note**
> Remember not to commit the .env file to your version control system. Add it to your .gitignore file to prevent it from being tracked.

In [None]:
# ========== CODE CELL 9 ==========
# Instantiate the SharePointDataExtractor client
# The client handles the complexities of interacting with SharePoint's REST API, providing an easy-to-use interface for data extraction.
sharepointClient = SharePointUtils()

# Load environment variables from the .env file
sharepointClient.loadEnvFile()

# Retrieve environment variables
azureOpenAIApiKey=os.getenv("AZURE_OPENAI_API_KEY") 
azureOpenAIDeploymentName=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
azureOpenAILLMModel=os.getenv("AZURE_OPENAI_LLM_MODEL")
azureOpenAIApiEndpoint= os.getenv("AZURE_OPENAI_ENDPOINT")
azureOpenAIApiVersion= os.getenv("AZURE_OPENAI_API_VERSION")
purviewAccountName = os.getenv("PURVIEW_ACCOUNT_NAME")
purviewEndpointUrl=os.getenv("PURVIEW_ENDPOINT_URL")
purviewTokenUrl=os.getenv("PURVIEW_TOKEN_URL")
tenantId=os.getenv("AZURE_TENANT_ID")
clientId=os.getenv("AZURE_CLIENT_ID")
clientSecret=os.getenv("AZURE_CLIENT_SECRET")
siteDomain = os.getenv("SITE_DOMAIN")
siteName = os.getenv("SITE_NAME")

You will need to update the values for the cell below to match the characteristics of your environment.

In [None]:
# ========== CODE CELL 11 ==========
# Enable or disable display of variables
displayVariables = True

# Global variable definitions
fileExtensions = ["docx","pdf","pptx"]
sharepointPath=""  # Empty string scans from root folder recursively
filesystemPath = ""  # Empty string scans from root folder recursively (or specify path like r"SampleFiles")

# Number of characters to be analyzed by Large Language Model (LLM) from each file
# Increased from 800 to 2000 for better context and classification accuracy
textLength=2000

# Sample size for filesystem and SharePoint files
sampleSize=0

# Entity types for classification / assets
entityTypes = [
    'SharepointAccount',
    'SharepointRootFolder',
    'SharepointFolder',
    'SharepointFile',
    'FileSystemRoot',
    'FileSystemFolder',
    'FileSystemFile',
    'DataSet'
]

# List of custom classifications to be created in Purview
# This list can be customized based on the specific needs of the organization or project.
classifications=[
    "Empty Content", 
    "Insurance Claim",  
    "Sales Receipt",  
    "Insurance Policy",
    "Report",
    "Invoice",
    "PII",
    "Other"
]
# Convert classification list to string
classificationsStr = ''.join(classification+'\n' for classification in classifications)

### 1.3 Configure Execution Parameters

Set which sections of the notebook should execute. You can enable/disable specific demos and cleanup operations.

In [None]:
# ========== CODE CELL 13 ==========
# ===== EXECUTION CONTROL PARAMETERS =====
# Set these parameters to control which sections of the notebook execute
# True = Execute the section, False = Skip the section

# Execute SharePoint Demo (Section 2)
RUN_SHAREPOINT_DEMO = True

# Execute File System Demo (Section 3)
RUN_FILESYSTEM_DEMO = True

# Execute Cleanup (Section 4)
RUN_CLEANUP = False

print("Execution Configuration:")
print(f"  SharePoint Demo: {'ENABLED' if RUN_SHAREPOINT_DEMO else 'DISABLED'}")
print(f"  File System Demo: {'ENABLED' if RUN_FILESYSTEM_DEMO else 'DISABLED'}")
print(f"  Cleanup: {'ENABLED' if RUN_CLEANUP else 'DISABLED'}")

In [None]:
# ========== CODE CELL 14 ==========
if displayVariables:
    print(f"Tenant ID: {tenantId}")
    print(f"Client ID: {clientId}") 
    print(f"Azure OpenAI API Key: {azureOpenAIApiKey}")
    print(f"Azure OpenAI Endpoint: {azureOpenAIApiEndpoint}")

In [None]:
# ========== CODE CELL 15 ==========
if not tenantId or not clientId or not clientSecret or not azureOpenAIApiKey:
    raise ValueError("Azure credentials are not set in the environment variables.")

# Generate token for REST API calls
token = getAADToken(tenantId,clientId, clientSecret,purviewTokenUrl)

# Authenticate with Microsoft Graph API
response = sharepointClient.msgraph_auth()

# Generate authentication credentials for Service Principal and Atlas client authentication for different Purview functions
servicePrincipalAuth = ServicePrincipalAuthentication(
    tenant_id=tenantId,
    client_id=clientId,
    client_secret=clientSecret
)

clientCredential = ClientSecretCredential(
    tenant_id=tenantId,
    client_id=clientId,
    client_secret=clientSecret
)

# Create clients for Purview administration and Azure AI Foundry
purviewClient = PurviewClient(
    account_name = purviewAccountName,
    authentication = clientCredential
)

collectionClient = PurviewCollections(
    purview_account_name=purviewAccountName,
    auth = servicePrincipalAuth
)

llmClient = ChatCompletionsClient(
    endpoint=azureOpenAIApiEndpoint,
    credential=AzureKeyCredential(azureOpenAIApiKey),
    temperature=0
)

### 1.4 Create Purview asset dependencies

Creates entity type definitions and classifications required by the Purview clients to assign classifications to assets discovered.

In [None]:
# ========== CODE CELL 17 - RELATIONSHIP TYPE DEFINITIONS ==========
# Creation of custom Entity Types with hierarchical relationship attributes
# The list of Entity Types is taken from the variable named entityTypes
from pyapacheatlas.core.typedef import AtlasAttributeDef, RelationshipTypeDef, AtlasRelationshipEndDef, Cardinality

# Step 1: Create Entity Type definitions FIRST (relationships need these to exist)
print("Creating entity type definitions...")
for entityName in entityTypes:
    if entityName == "DataSet":
        # Built-in type; do not attempt to upload/modify
        continue
    
    edef = EntityTypeDef(
        name=entityName,
        superTypes=['DataSet']
    )
    results = purviewClient.upload_typedefs(
        entityDefs=[edef],
        force_update=True,
    )
    print(f"‚úÖ Created entity type: {entityName}")

# Step 2: Create RelationshipTypeDef objects (now that entity types exist)
print("\nCreating relationship type definitions...")

# SharePoint relationship types
sharepoint_relationships = [
    RelationshipTypeDef(
        name='sharepoint_account_root',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='SharepointAccount', name='rootFolders', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='SharepointRootFolder', name='account', isContainer=False, cardinality=Cardinality.SINGLE)
    ),
    RelationshipTypeDef(
        name='sharepoint_root_folder',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='SharepointRootFolder', name='folders', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='SharepointFolder', name='rootFolder', isContainer=False, cardinality=Cardinality.SINGLE)
    ),
    RelationshipTypeDef(
        name='sharepoint_folder_subfolder',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='SharepointFolder', name='subfolders', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='SharepointFolder', name='parentFolder', isContainer=False, cardinality=Cardinality.SINGLE)
    ),
    RelationshipTypeDef(
        name='sharepoint_folder_files',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='SharepointFolder', name='files', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='SharepointFile', name='folder', isContainer=False, cardinality=Cardinality.SINGLE)
    ),
    RelationshipTypeDef(
        name='sharepoint_root_files',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='SharepointRootFolder', name='files', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='SharepointFile', name='rootFolder', isContainer=False, cardinality=Cardinality.SINGLE)
    )
]

# FileSystem relationship types
filesystem_relationships = [
    RelationshipTypeDef(
        name='filesystem_root_folder',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='FileSystemRoot', name='folders', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='FileSystemFolder', name='root', isContainer=False, cardinality=Cardinality.SINGLE)
    ),
    RelationshipTypeDef(
        name='filesystem_folder_subfolder',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='FileSystemFolder', name='subfolders', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='FileSystemFolder', name='parentFolder', isContainer=False, cardinality=Cardinality.SINGLE)
    ),
    RelationshipTypeDef(
        name='filesystem_folder_files',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='FileSystemFolder', name='files', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='FileSystemFile', name='folder', isContainer=False, cardinality=Cardinality.SINGLE)
    ),
    RelationshipTypeDef(
        name='filesystem_root_files',
        relationshipCategory='COMPOSITION',
        endDef1=AtlasRelationshipEndDef(typeName='FileSystemRoot', name='files', isContainer=True, cardinality=Cardinality.SET),
        endDef2=AtlasRelationshipEndDef(typeName='FileSystemFile', name='root', isContainer=False, cardinality=Cardinality.SINGLE)
    )
]

# Upload relationship type definitions
all_relationships = sharepoint_relationships + filesystem_relationships
results = purviewClient.upload_typedefs(
    relationshipDefs=all_relationships,
    force_update=True
)
print(f"‚úÖ Created {len(all_relationships)} relationship type definitions")

# Step 3: Creation of custom Classifications
# The list of classifications is taken from the variable named classifications
for classification in classifications:
    # Create custom classifications to be applied to unstructured data assets
    cdef = ClassificationTypeDef(
        name=classification,
        # entityTypes will restrict the types of assets that can be associated with this classification.
        entityTypes=entityTypes
    )
    # Do the upload
    results = purviewClient.upload_typedefs(
        classificationDefs=[cdef],
        force_update=True,
    )

### 1.5 Create custom collections

Creates multiple custom collection under the parent Start_Collection (Domain)


In [None]:
# To create multiple collections, the parent collection defined by the start_collection parameter
# MUST exist.
response = collectionClient.create_collections(start_collection=purviewAccountName,
                          collection_names=['Unstructured/SharePoint','Unstructured/FileSystem'])

### 1.6 Capture Sampling Size

This will help to determine the number of files that will be analyzed for classification purposes.

> üìå **Note:**
> Currently is a fixed size, but it could be changed to represent a percentage of the total number of files found during the scan.

sampleSize = input(f"Enter how many documents to analyze: ")
if sampleSize.isnumeric():
    sampleSize = int(sampleSize)
else:
    sampleSize = 0
print(f"\n{sampleSize} documents will be analyzed from the list of documents found.")

## 2. SharePoint Demo

‚öôÔ∏è **Controlled by parameter:** `RUN_SHAREPOINT_DEMO`

### 2.1 Scan SharePoint Site

üí° Skip this cell if `RUN_SHAREPOINT_DEMO = False`

In [None]:
if RUN_SHAREPOINT_DEMO:
    """
    List all the files in SharePoint site that match the defined file extensions. 
    """
    spFileList = sharepointClient.listSharepointFiles(
        site_domain=siteDomain,
        site_name=siteName,
        file_formats = fileExtensions,
        folder_path=sharepointPath if sharepointPath else None,
        # Files modified N minutes ago
        # minutes_ago=60,
    )
    
    # Handle None return (API error or no files found)
    if spFileList is None:
        print("‚ö†Ô∏è  No files found or SharePoint API error occurred")
        spFileList = []
    else:
        print(f"{len(spFileList)} files found matching the patterns {fileExtensions}: \n")
else:
    print("‚è≠Ô∏è  Skipping SharePoint Demo - RUN_SHAREPOINT_DEMO is False")
    spFileList = []

In [None]:
if RUN_SHAREPOINT_DEMO and displayVariables == True:
    print(json.dumps(spFileList, indent=2))

### 2.2 Generate file subset

In [None]:
if RUN_SHAREPOINT_DEMO:
    # Create a subset of the spFileList based on the number specified by sampleSize. If no subset is provided, the entire list will be used.
    if sampleSize == 0 or sampleSize > len(spFileList):
            sampleSize = len(spFileList)
    # Create a subset of the SharePoint file list
    spFileSubset = sharepointClient.sharepointFileSampleList(spFileList,sampleSize)

In [None]:
if RUN_SHAREPOINT_DEMO and displayVariables:
    print(f"\nSubset of SharePoint files to be analyzed: {sampleSize} files\n")
    for file in spFileSubset:
        print(f"{file}")

### 2.3 Extract file contents

In [None]:
if RUN_SHAREPOINT_DEMO:
    """
    Extract file contents and process all file information included in the subset from a 
    specific Site ID.
    """
    spFileContent = sharepointClient.getSharepointFileContent(
        site_domain=os.environ["SITE_DOMAIN"],
        site_name=os.environ["SITE_NAME"],
        folder_path=sharepointPath,
        file_names=spFileSubset
        # Files modified N minutes ago
        # minutes_ago=60,
    )

In [None]:
if RUN_SHAREPOINT_DEMO and displayVariables:
    print(json.dumps(spFileContent, indent=2))

### 2.4 Analyze File Contents with LLM

### Estimate the number of tokens that will be used by LLM model, prior to processing the documents

In [None]:
if RUN_SHAREPOINT_DEMO:
    tokens = estimateTokens(spFileContent,textLength,classificationsStr,azureOpenAILLMModel)
    print(f"Estimated Number of Tokens: {tokens}")

### 2.5 Classify document contents using LLM

In [None]:
if RUN_SHAREPOINT_DEMO:
    """
    Analyze SharePoint folder contents using Large Language Model to determine applicable
    classifications. 
    """
    spFileContent = unstructuredDataClassification(spFileContent,textLength,llmClient,azureOpenAIDeploymentName,classificationsStr)

### 2.6 Organize and Rollup Classifications

In [None]:
if RUN_SHAREPOINT_DEMO:
    """
    Collect document classifications identified for SharePoint folder
    """
    spClassifications = rollupClassifications(spFileContent)

In [None]:
if RUN_SHAREPOINT_DEMO and displayVariables:
    print(f"\nClassifications for SharePoint files: {spClassifications}")

### 2.7 Ingest assets into Purview via Atlas API

In [None]:
if RUN_SHAREPOINT_DEMO:
    """
    Load SharePoint Assets in Purview.
    """
    spGuids = loadPurviewAssets(purviewClient,spFileContent)
else:
    spGuids = {"all": [], "file": []}
# Normalize legacy list return shape
if isinstance(spGuids, list):
    spGuids = {"all": spGuids, "file": spGuids}

In [None]:
if RUN_SHAREPOINT_DEMO and displayVariables:
    print(f"SharePoint GUIDs (all): {spGuids.get('all', [])}")
    print(f"SharePoint File GUIDs: {spGuids.get('file', [])}")

### 2.8 Apply classifications to assets

In [None]:
if RUN_SHAREPOINT_DEMO:
    """
    Apply classification to SharePoint assets
    """
    result = applyPurviewClassifications(purviewClient,spGuids.get('file', []),spClassifications)

### 2.9 Move assets to their final collection

In [None]:
if RUN_SHAREPOINT_DEMO:
    """
    Move assets from default (root) collection to collectionName
    """
    collectionName = 'SharePoint'
    output = moveCollection(collectionName,purviewEndpointUrl,token,spGuids.get('all', []))

## 3. File System Demo

### 3.1 Scan Filesystem

In [None]:
if RUN_FILESYSTEM_DEMO:
    """
    List all the files in Filesystem that match the defined file extensions. 
    """
    fsFileList = listFilesystemFiles(filesystemPath, fileExtensions)
    print(f"{len(fsFileList)} files found matching the patterns {fileExtensions}")
else:
    print("‚è≠Ô∏è  Skipping FileSystem Demo - RUN_FILESYSTEM_DEMO is False")
    fsFileList = []

In [None]:
if RUN_FILESYSTEM_DEMO and displayVariables:
    for file in fsFileList:
        print(f"{file}")

### 3.2 Generate file subset and extract contents

In [None]:
if RUN_FILESYSTEM_DEMO:
    """
    Create a subset of the fsFileList based on the number specified by sampleSize, extract file 
    contents, and metadata.
    """
    if sampleSize == 0 or sampleSize > len(fsFileList):
            sampleSize = len(fsFileList)

    fsFileContent = filesystemFileSampleList(fsFileList,sampleSize,filesystemPath)
else:
    fsFileContent = []

In [None]:
if RUN_FILESYSTEM_DEMO and displayVariables:
    fsFileContent

### 3.3 Estimate number of tokens to be used by LLM

In [None]:
if RUN_FILESYSTEM_DEMO:
    tokens = estimateTokens(fsFileContent,textLength,classificationsStr,azureOpenAILLMModel)
    print(f"Estimated Number of Tokens: {tokens}")

### 3.4 Classify document contents using LLM

In [None]:
if RUN_FILESYSTEM_DEMO:
    """
    Analyze Filesystem folder contents using Large Language Model to determine applicable
    classifications. 
    """
    fsFileContent = unstructuredDataClassification(fsFileContent,textLength,llmClient,azureOpenAIDeploymentName,classificationsStr)

### 3.5 Organize and Rollup Classifications

In [None]:
if RUN_FILESYSTEM_DEMO:
    """
    Collect document classifications identified for FileSystem folder
    """
    fsClassifications = rollupClassifications(fsFileContent)
else:
    fsClassifications = []

In [None]:
if RUN_FILESYSTEM_DEMO and displayVariables:
    print(f"\nClassifications for FileSystem files: {fsClassifications}")

### 3.6 Ingest assets into Purview via Atlas API

In [None]:
if RUN_FILESYSTEM_DEMO:
    """
    Load FileSystem Assets in Purview.
    """
    fsGuids = loadPurviewAssets(purviewClient,fsFileContent)
    # Normalize to dict format if loadPurviewAssets returned a list (backward compatibility)
    if isinstance(fsGuids, list):
        fsGuids = {"all": fsGuids, "file": fsGuids}
else:
    fsGuids = {"all": [], "file": []}

In [None]:
if RUN_FILESYSTEM_DEMO and displayVariables:
    print(f"\nFileSystem GUIDs (all): {fsGuids.get('all', [])}")
    print(f"FileSystem File GUIDs: {fsGuids.get('file', [])}")

### 3.7 Apply classifications to assets

In [None]:
if RUN_FILESYSTEM_DEMO:
    """
    Apply classification to FileSystem assets
    """
    result = applyPurviewClassifications(purviewClient,fsGuids.get('file', []),fsClassifications)

### 3.8 Move assets to their final collection

In [None]:
if RUN_FILESYSTEM_DEMO:
    """
    Move collections from default (root) collection to collectionName
    """
    collectionName = 'FileSystem'
    output = moveCollection(collectionName,purviewEndpointUrl,token,fsGuids.get('all', []))

## 4. Cleanup section


In [None]:
import time

print("‚è≥ Waiting before cleanup...")
print("This pause allows you to review the assets in Purview before they are deleted.")
print("Press Ctrl+C to cancel cleanup, or wait for the countdown to complete.\n")

wait_time = 300  # seconds

try:
    for remaining in range(wait_time, 0, -1):
        print(f"Cleanup will begin in {remaining} seconds...", end='\r')
        time.sleep(1)
    print("\n‚úÖ Proceeding with cleanup...")
except KeyboardInterrupt:
    print("\n\n‚ö†Ô∏è Cleanup cancelled by user.")
    RUN_CLEANUP = True

### 4.1 Delete assets and collections

You can delete individual assets using their respective GUIDs or you can leverage the collectionClient to delete collections recursively.

In [None]:
if RUN_CLEANUP:
    print("üóëÔ∏è  Step 1: Querying and deleting all custom type assets...")
    # Query for all entities of custom types and delete them
    deleted_count = 0
    
    # First delete from session GUIDs if available
    session_guids = [*spGuids.get('all', []), *fsGuids.get('all', [])]
    for guid in session_guids:
        try:
            response = purviewClient.delete_entity(guid=guid)
            deleted_count += 1
        except Exception as e:
            print(f"  ‚ö†Ô∏è  Could not delete {guid}: {e}")
    
    # Query and delete any remaining entities of custom types
    for entityType in ['SharepointAccount', 'SharepointRootFolder', 'SharepointFolder', 'SharepointFile', 
                       'FileSystemRoot', 'FileSystemFolder', 'FileSystemFile']:
        try:
            search_results = purviewClient.search_entities(f"typeName:{entityType}")
            if search_results and 'value' in search_results:
                for entity in search_results['value']:
                    try:
                        purviewClient.delete_entity(guid=entity['id'])
                        deleted_count += 1
                        print(f"  üóëÔ∏è  Deleted {entityType}: {entity.get('name', entity['id'])}")
                    except Exception as e:
                        print(f"  ‚ö†Ô∏è  Could not delete {entity['id']}: {e}")
        except Exception as e:
            print(f"  ‚ö†Ô∏è  Could not query {entityType}: {e}")
    
    print(f"  ‚úÖ Deleted {deleted_count} assets")
    
    print("\nüóëÔ∏è  Step 2: Deleting collections...")
    try:
        collectionClient.delete_collections_recursively("Unstructured", delete_assets=True)
        collectionClient.delete_collections("Unstructured")
        print("  ‚úÖ Collections deleted")
    except Exception as e:
        print(f"  ‚ö†Ô∏è  Could not delete collections: {e}")
    
    import time
    print("\n‚è≥ Waiting 15 seconds for asset deletion to propagate...")
    time.sleep(15)
    
    print("\nüóëÔ∏è  Step 3: Deleting relationship type definitions...")
    relationship_types = [
        'sharepoint_account_root', 'sharepoint_root_folder', 'sharepoint_folder_subfolder',
        'sharepoint_folder_files', 'sharepoint_root_files',
        'filesystem_root_folder', 'filesystem_folder_subfolder', 
        'filesystem_folder_files', 'filesystem_root_files'
    ]
    for rel_type in relationship_types:
        try:
            purviewClient.delete_type(rel_type)
            print(f"  ‚úÖ Deleted relationship type: {rel_type}")
        except Exception as e:
            print(f"  ‚ö†Ô∏è  Could not delete {rel_type}: {e}")
    
    print("\nüóëÔ∏è  Step 4: Deleting custom classifications...")
    for classification in classifications:
        try:
            purviewClient.delete_type(classification)
            print(f"  ‚úÖ Deleted classification: {classification}")
        except Exception as e:
            print(f"  ‚ö†Ô∏è  Could not delete {classification}: {e}")

    print("\nüóëÔ∏è  Step 5: Deleting custom entity types...")
    for entityName in entityTypes:
        if entityName == "DataSet":
            continue
        try:
            edef = EntityTypeDef(
                name = entityName,
                superTypes= ['DataSet']
            )
            results = purviewClient.delete_typedefs(
                entityDefs=[edef],
                force_update=True
            )
            print(f"  ‚úÖ Deleted entity type: {entityName}")
        except Exception as e:
            print(f"  ‚ö†Ô∏è  Could not delete {entityName}: {e}")
    
    print("\n‚úÖ Cleanup complete!")
else:
    print("‚è≠Ô∏è  Skipping Cleanup - RUN_CLEANUP is False")

In [None]:
if RUN_CLEANUP:
    # Delete all Jupyter notebook variables
    %reset -f
else:
    print("‚è≠Ô∏è  Skipping variable reset - RUN_CLEANUP is False")