# Export SalesLT Tables to Retail Data Model Bronze Layer

This notebook dynamically discovers and exports all tables from the SalesLT schema in Gaiye-SQL-DB to the retail data model bronze layer.

**Prerequisites:**
- Fabric workspace with access to Gaiye-SQL-DB
- Retail data model lakehouse attached to this notebook
- Appropriate permissions for SQL database and lakehouse access

In [None]:
# Import required libraries
import pandas as pd
import pyodbc
from datetime import datetime
import os
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("✅ Libraries imported successfully")
print(f"📅 Export started at: {datetime.now()}")

In [None]:
# Database connection configuration
# Note: In Fabric, use the built-in SQL database connectivity
database_name = "Gaiye-SQL-DB"
schema_name = "SalesLT"

# Fabric SQL connection string (adjust as needed for your environment)
# This uses Fabric's integrated authentication
connection_string = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER=tcp:{database_name}.sql.fabric.microsoft.com,1433;DATABASE={database_name};Authentication=ActiveDirectoryIntegrated;Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;"

print(f"🔗 Connecting to database: {database_name}")
print(f"📊 Target schema: {schema_name}")

# Test connection
try:
    conn = pyodbc.connect(connection_string)
    print("✅ Database connection successful")
    conn.close()
except Exception as e:
    print(f"❌ Connection failed: {str(e)}")
    print("💡 Note: You may need to adjust the connection string for your Fabric environment")

In [None]:
# Discover all tables in SalesLT schema
table_discovery_query = """
SELECT TABLE_NAME 
FROM INFORMATION_SCHEMA.TABLES 
WHERE TABLE_SCHEMA = 'SalesLT' 
AND TABLE_TYPE = 'BASE TABLE'
ORDER BY TABLE_NAME
"""

print("🔍 Discovering tables in SalesLT schema...")

try:
    # Execute table discovery query
    conn = pyodbc.connect(connection_string)
    tables_df = pd.read_sql(table_discovery_query, conn)
    conn.close()
    
    # Extract table names
    table_names = tables_df['TABLE_NAME'].tolist()
    
    print(f"✅ Found {len(table_names)} tables in {schema_name} schema:")
    for i, table in enumerate(table_names, 1):
        print(f"   {i}. {table}")
        
except Exception as e:
    print(f"❌ Error discovering tables: {str(e)}")
    table_names = []

In [None]:
# Define export function
def export_table_to_bronze(table_name, connection_string, schema_name="SalesLT"):
    """
    Export a single table to bronze layer
    """
    try:
        logger.info(f"Starting export for table: {table_name}")
        
        # Build dynamic query
        query = f"SELECT * FROM {schema_name}.{table_name}"
        
        # Connect and extract data
        conn = pyodbc.connect(connection_string)
        df = pd.read_sql(query, conn)
        conn.close()
        
        # Add metadata
        df['_source_table'] = f"{schema_name}.{table_name}"
        df['_extraction_timestamp'] = datetime.now()
        df['_source_database'] = database_name
        
        # Define bronze layer path
        bronze_path = f"Files/bronze/{table_name.lower()}"
        
        # Save to bronze layer as Parquet
        df.to_parquet(f"{bronze_path}/{table_name.lower()}.parquet", index=False)
        
        logger.info(f"✅ Successfully exported {table_name}: {len(df)} rows")
        
        return {
            'table_name': table_name,
            'status': 'success',
            'row_count': len(df),
            'columns': list(df.columns),
            'bronze_path': bronze_path,
            'file_size_mb': round(df.memory_usage(deep=True).sum() / 1024 / 1024, 2)
        }
        
    except Exception as e:
        error_msg = f"❌ Error exporting {table_name}: {str(e)}"
        logger.error(error_msg)
        
        return {
            'table_name': table_name,
            'status': 'failed',
            'error': str(e),
            'row_count': 0
        }

print("📝 Export function defined")

In [None]:
# Execute export for all discovered tables
print(f"🚀 Starting export of {len(table_names)} tables to bronze layer...\n")

export_results = []
successful_exports = 0
failed_exports = 0

for i, table_name in enumerate(table_names, 1):
    print(f"📊 Processing table {i}/{len(table_names)}: {table_name}")
    
    # Export table
    result = export_table_to_bronze(table_name, connection_string, schema_name)
    export_results.append(result)
    
    # Track success/failure
    if result['status'] == 'success':
        successful_exports += 1
        print(f"   ✅ {result['row_count']} rows exported ({result.get('file_size_mb', 0)} MB)")
    else:
        failed_exports += 1
        print(f"   ❌ Export failed: {result.get('error', 'Unknown error')}")
    
    print()  # Empty line for readability

print(f"🎯 Export completed!")
print(f"✅ Successful: {successful_exports} tables")
print(f"❌ Failed: {failed_exports} tables")

In [None]:
# Generate detailed export summary
print("📋 EXPORT SUMMARY REPORT")
print("=" * 50)
print(f"Export Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Source Database: {database_name}")
print(f"Source Schema: {schema_name}")
print(f"Destination: Bronze Layer")
print()

# Create summary DataFrame
summary_df = pd.DataFrame(export_results)

if not summary_df.empty:
    # Display successful exports
    successful_df = summary_df[summary_df['status'] == 'success']
    if not successful_df.empty:
        print("✅ SUCCESSFUL EXPORTS:")
        print(successful_df[['table_name', 'row_count', 'file_size_mb']].to_string(index=False))
        print()
        
        total_rows = successful_df['row_count'].sum()
        total_size = successful_df['file_size_mb'].sum()
        print(f"📊 Total rows exported: {total_rows:,}")
        print(f"💾 Total data size: {total_size:.2f} MB")
        print()
    
    # Display failed exports
    failed_df = summary_df[summary_df['status'] == 'failed']
    if not failed_df.empty:
        print("❌ FAILED EXPORTS:")
        print(failed_df[['table_name', 'error']].to_string(index=False))
        print()

# Save summary report
summary_report = {
    'export_timestamp': datetime.now().isoformat(),
    'source_database': database_name,
    'source_schema': schema_name,
    'total_tables_discovered': len(table_names),
    'successful_exports': successful_exports,
    'failed_exports': failed_exports,
    'export_details': export_results
}

# Save summary as JSON in bronze layer
import json
summary_path = "Files/bronze/_export_summary.json"
with open(summary_path, 'w') as f:
    json.dump(summary_report, f, indent=2, default=str)

print(f"📄 Export summary saved to: {summary_path}")
print("\n🎉 SalesLT schema export to bronze layer completed!")

In [None]:
# Validation: Verify files exist in bronze layer
print("🔍 VALIDATION: Checking bronze layer contents...")
print("=" * 50)

bronze_base_path = "Files/bronze"

try:
    # List bronze layer directories
    if os.path.exists(bronze_base_path):
        bronze_contents = os.listdir(bronze_base_path)
        
        print(f"📁 Bronze layer contains {len(bronze_contents)} items:")
        for item in sorted(bronze_contents):
            item_path = os.path.join(bronze_base_path, item)
            if os.path.isdir(item_path):
                # Check for parquet files in the directory
                files = [f for f in os.listdir(item_path) if f.endswith('.parquet')]
                print(f"   📂 {item}/ ({len(files)} parquet files)")
            else:
                print(f"   📄 {item}")
    else:
        print(f"❌ Bronze layer path not found: {bronze_base_path}")
        
except Exception as e:
    print(f"❌ Error validating bronze layer: {str(e)}")

print("\n✅ Validation completed")

## Next Steps

After running this notebook, you should have:

1. **Exported Data**: All SalesLT tables saved as Parquet files in the bronze layer
2. **Metadata**: Each file includes source tracking and extraction timestamps
3. **Summary Report**: JSON file with complete export details
4. **File Organization**: Each table in its own folder within `/Files/bronze/`

### Recommended Next Actions:

- **Review the data**: Check the bronze layer files to ensure data quality
- **Create silver layer transformations**: Build data pipelines to clean and standardize the data
- **Map to retail model**: Align the SalesLT schema with your retail data model requirements
- **Set up monitoring**: Create alerts for data freshness and quality

### Troubleshooting:

- **Connection issues**: Verify your Fabric workspace has access to the SQL database
- **Permission errors**: Ensure you have read access to SalesLT schema and write access to the lakehouse
- **Large tables**: For very large tables, consider implementing chunked processing
- **Data types**: Some SQL Server data types may need special handling during export