In [None]:
class ADLSConnector:

    # Reusable code for connecting to ADLS
    
    def __init__(self, environment='nonprod'): 
        """
        Initialize the ADLS connector
        
        Args:
            environment (str): 'prod' or 'nonprod' to determine which config to use
        """
        self.environment = environment
        self.config_dict = None
        self.source_path = None
        self.mount_point = None
        self.is_mounted = False
        
    def setup_connection(self, 
                        storage_account_name, 
                        container_name, 
                        spn_scope,  # spn_scope or secret_scope 
                        mount_name,
                        subfolder_path=""):   # subfolder path is optional
        """
        Set up the ADLS connection configuration
        
        Args:
            storage_account_name (str): Name of your Azure storage account
            container_name (str): Name of the container in storage account
            spn_scope (str): Databricks secret scope containing credentials
            mount_name (str): Local name for the mount point
            subfolder_path (str): Optional subfolder path within container
        """
        
        # Get credentials from Databricks secrets
        try:
            client_id = dbutils.secrets.get(spn_scope, f"{self.environment}-client-id")
            client_secret = dbutils.secrets.get(spn_scope, f"{self.environment}-client-secret")
            tenant_id = dbutils.secrets.get(spn_scope, "tenant-id")
        except Exception as e:
            raise Exception(f"Failed to retrieve secrets from scope '{spn_scope}': {str(e)}")
        
        # Build Azure storage path
        self.source_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{subfolder_path}"
        
        # Set local mount point
        self.mount_point = f"/mnt/{mount_name}"
        
        # Create OAuth configuration
        self.config_dict = {
            "fs.azure.account.auth.type": "OAuth",
            "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
            "fs.azure.account.oauth2.client.id": client_id,
            "fs.azure.account.oauth2.client.secret": client_secret,
            "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
        }
        
        print(f"Done! Configuration set up for {self.environment} environment")
        print(f"  Source: {self.source_path}")
        print(f"  Mount: {self.mount_point}")
        
    def mount(self):
        """
        Mount the ADLS storage to Databricks file system
        """
        if not self.config_dict:
            raise Exception("Configuration not set up. Call setup_connection() first.")
        
        try:
            dbutils.fs.mount(
                source=self.source_path,
                mount_point=self.mount_point,
                extra_configs=self.config_dict
            )
            self.is_mounted = True
            print(f"Done! Successfully mounted {self.source_path} at {self.mount_point}")
            
        except Exception as e:
            if "already mounted" in str(e).lower():
                self.is_mounted = True
                print(f" {self.mount_point} already mounted")
            else:
                raise Exception(f"Failed to mount ADLS: {str(e)}")
    
    def unmount(self):
        """
        Unmount the ADLS storage
        """
        if not self.mount_point:
            raise Exception("No mount point configured")
            
        try:
            dbutils.fs.unmount(self.mount_point)
            self.is_mounted = False
            print(f"Done! Successfully unmounted {self.mount_point}")
            
        except Exception as e:
            print(f"Could not unmount! , {self.mount_point}: {str(e)}")
    
    def get_file_path(self, relative_path):
        """
        Get the full mounted path for a file
        
        Args:
            relative_path (str): Path relative to mount point
            
        Returns:
            str: Full path to the file
        """
        if not self.is_mounted:
            raise Exception("Storage not mounted. Call mount() first.")
        
        from pathlib import Path
        return str(Path(f"/dbfs{self.mount_point}", relative_path))
    
    def list_files(self, subfolder=""):
        """
        List files in the mounted storage
        
        Args:
            subfolder (str): Optional subfolder to list
            
        Returns:
            list: List of file information
        """
        if not self.is_mounted:
            raise Exception("Storage not mounted. Call mount() first.")
        
        path = f"{self.mount_point}/{subfolder}" if subfolder else self.mount_point
        return dbutils.fs.ls(path)
    
    def check_connection(self):
        """
        Test the connection by listing the root directory
        """
        try:
            files = self.list_files()
            print(f"Done! Connection successful! Found {len(files)} items in root directory")
            return True
        except Exception as e:
            print(f"Connection failed: {str(e)}")
            return False

# Helper function for quick setup
def create_adls_connection(environment, storage_account, container, spn_scope, mount_name, subfolder=""):
    """
    Quick helper function to create and mount ADLS connection
    
    Returns:
        ADLSConnector: Configured and mounted connector
    """
    connector = ADLSConnector(environment)
    connector.setup_connection(storage_account, container, spn_scope, mount_name, subfolder)
    connector.mount()
    return connector

In [None]:
# Create connector
adls = ADLSConnector(environment='prod')

# Set up connection
adls.setup_connection(
    storage_account_name='myadls',
    container_name='analytics-data',
    secret_scope='my-secrets',
    mount_name='analytics_prod',
    subfolder_path='reports/2024'
)

# Mount the storage
adls.mount()

# Test connection
adls.check_connection()

# Use the mounted storage
file_path = adls.get_file_path('monthly_report.csv')
df = spark.read.csv(file_path)

In [None]:
# One-liner setup
adls = create_adls_connection(
    environment='nonprod',
    storage_account='myadls',
    container='analytics-data',
    secret_scope='my-secrets',
    mount_name='analytics_dev'
)

# Use it immediately
files = adls.list_files()

Print File

In [None]:
def print_text_file(adls_connector, text_file_path):
    """
    Print contents of a text file
    """
    try:
        # Get full file path
        full_path = adls_connector.get_file_path(text_file_path)
        
        # Read and print the file
        with open(full_path, 'r') as file:
            content = file.read()
            print(f"Contents of {text_file_path}:")
            print("-" * 50)
            print(content)
            print("-" * 50)
            
        return content
        
    except Exception as e:
        print(f"Error reading text file: {str(e)}")
        return None

# Usage
adls = create_adls_connection('nonprod', 'mystorageaccount', 'data', 'my-secrets', 'data_mount')
content = print_text_file(adls, "logs/process_log.txt")

CSV File 

In [None]:
def sum_csv_column(adls_connector, csv_file_path, column_index=2):
    """
    Sum a specific column in a CSV file
    
    Args:
        adls_connector: Your ADLS connector instance
        csv_file_path: Relative path to CSV file
        column_index: Column index (0-based, so 2 = 3rd column)
    """
    try:
        # Get full file path
        full_path = adls_connector.get_file_path(csv_file_path)
        
        # Method 1: Using Spark (better for large files)
        df = spark.read.csv(full_path, header=True, inferSchema=True)
        columns = df.columns
        column_name = columns[column_index]
        
        total = df.select(sum(col(column_name)).alias("total")).collect()[0]["total"]
        print(f"Sum of column '{column_name}' (column {column_index + 1}): {total}")
        
        return total
        
    except Exception as e:
        print(f"Error reading CSV: {str(e)}")
        return None

# Usage
adls = create_adls_connection('nonprod', 'mystorageaccount', 'data', 'my-secrets', 'data_mount')
total = sum_csv_column(adls, "sales/monthly_sales.csv", column_index=2)