# Microsoft Fabric SQL Database Connection Troubleshooting

## Overview
This notebook helps diagnose and resolve connection issues with **Gaiye-SQL-DB** in Microsoft Fabric, then demonstrates the complete retail data pipeline implementation.

### What This Notebook Does:
1. **🔍 Diagnose Connection Issues**: Test various connection methods and identify problems
2. **🔧 Fix Common Problems**: Provide solutions for typical Fabric SQL connectivity issues  
3. **📊 Validate Data Access**: Verify you can access SalesLT schema tables
4. **🚀 Run Complete Pipeline**: Execute Bronze-Silver-Gold medallion architecture
5. **✅ Export Results**: Save data in multiple formats for Fabric deployment

### Prerequisites:
- Microsoft Fabric workspace access
- Gaiye-SQL-DB permissions (read access to SalesLT schema)
- Retail data model lakehouse attached to this notebook
- Python libraries: pandas, pyodbc, numpy

---

**🚨 Start Here If:**
- You can't see Gaiye-SQL-DB in your workspace
- Getting connection timeout errors
- Authentication failures when accessing SQL database
- Need to verify SalesLT schema access

In [None]:
# Import required libraries and setup
import pandas as pd
import pyodbc
import numpy as np
from datetime import datetime, timedelta
import json
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Import the retail pipeline class
try:
    from fabric_retail_pipeline import FabricRetailDataPipeline
    print("✅ Successfully imported FabricRetailDataPipeline")
except ImportError as e:
    print(f"⚠️ Could not import FabricRetailDataPipeline: {e}")
    print("📝 Note: The pipeline will be created inline if import fails")

# Display system information
print(f"🐍 Python version: {sys.version}")
print(f"🐼 Pandas version: {pd.__version__}")
print(f"📅 Current timestamp: {datetime.now()}")

# Check if we're in Fabric environment
try:
    import notebookutils
    print("✅ Running in Microsoft Fabric environment")
    FABRIC_ENV = True
except ImportError:
    print("⚠️ Not detected as Fabric environment - using local setup")
    FABRIC_ENV = False

In [None]:
# SQL Server Connection Configuration
print("🔧 STEP 1: Configure SQL Server Connection Parameters")
print("=" * 60)

# Database configuration
DATABASE_NAME = "Gaiye-SQL-DB"
SCHEMA_NAME = "SalesLT"
SERVER_NAME = f"{DATABASE_NAME}.sql.fabric.microsoft.com"

# Multiple connection string options to try
CONNECTION_STRINGS = {
    "fabric_integrated": f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER=tcp:{SERVER_NAME},1433;DATABASE={DATABASE_NAME};Authentication=ActiveDirectoryIntegrated;Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;",
    
    "fabric_default": f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER=tcp:{SERVER_NAME},1433;DATABASE={DATABASE_NAME};Authentication=ActiveDirectoryDefault;Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;",
    
    "fabric_basic": f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={SERVER_NAME};DATABASE={DATABASE_NAME};Trusted_Connection=yes;Encrypt=yes;Connection Timeout=30;",
    
    "local_test": f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER=tcp:{SERVER_NAME},1433;DATABASE={DATABASE_NAME};Encrypt=yes;Connection Timeout=60;"
}

print(f"📊 Target Database: {DATABASE_NAME}")
print(f"🔍 Target Schema: {SCHEMA_NAME}")
print(f"🌐 Server Endpoint: {SERVER_NAME}")
print(f"🔗 Connection strings prepared: {len(CONNECTION_STRINGS)}")

# Function to test connection
def test_connection(conn_string, description):
    """Test a specific connection string"""
    try:
        print(f"\n🔍 Testing: {description}")
        conn = pyodbc.connect(conn_string)
        cursor = conn.cursor()
        
        # Test basic connectivity
        cursor.execute("SELECT @@VERSION")
        version = cursor.fetchone()[0]
        
        # Test database access
        cursor.execute("SELECT DB_NAME()")
        db_name = cursor.fetchone()[0]
        
        conn.close()
        
        print(f"✅ SUCCESS: Connected to {db_name}")
        print(f"   SQL Server: {version[:50]}...")
        return True, conn_string
        
    except pyodbc.Error as e:
        print(f"❌ FAILED: {str(e)}")
        return False, None
    except Exception as e:
        print(f"❌ ERROR: {str(e)}")
        return False, None

print(f"\n🚀 Testing {len(CONNECTION_STRINGS)} connection methods...")
successful_connection = None

In [None]:
# Test all connection methods
print("🔍 STEP 2: Test Database Connectivity")
print("=" * 60)

successful_connection = None
working_conn_string = None

for method, conn_string in CONNECTION_STRINGS.items():
    success, working_string = test_connection(conn_string, method)
    if success:
        successful_connection = method
        working_conn_string = working_string
        break

if successful_connection:
    print(f"\n🎉 SUCCESSFUL CONNECTION FOUND!")
    print(f"✅ Working method: {successful_connection}")
    print(f"🔗 Connection string: {working_conn_string}")
else:
    print(f"\n❌ ALL CONNECTION ATTEMPTS FAILED")
    print(f"\n🔧 TROUBLESHOOTING STEPS:")
    print(f"1. ✔️ Verify Gaiye-SQL-DB exists in your Fabric workspace")
    print(f"2. ✔️ Check you have read permissions on the database")
    print(f"3. ✔️ Confirm you're in the correct Fabric workspace")
    print(f"4. ✔️ Try refreshing your browser and reconnecting")
    print(f"5. ✔️ Contact workspace admin for database access")
    
    # Continue with a fallback connection for demonstration
    working_conn_string = CONNECTION_STRINGS["fabric_integrated"]
    print(f"\n⚠️ Using fallback connection for demonstration purposes")

In [None]:
# Discover available schemas and tables
print("🔍 STEP 3: Discover Database Schema and Tables")
print("=" * 60)

def discover_database_structure(conn_string):
    """Discover schemas, tables, and columns in the database"""
    try:
        conn = pyodbc.connect(conn_string)
        
        # Get all schemas
        schema_query = """
        SELECT DISTINCT SCHEMA_NAME 
        FROM INFORMATION_SCHEMA.SCHEMATA 
        WHERE SCHEMA_NAME NOT IN ('sys', 'INFORMATION_SCHEMA')
        ORDER BY SCHEMA_NAME
        """
        schemas_df = pd.read_sql(schema_query, conn)
        
        print(f"📚 Available Schemas ({len(schemas_df)}):")
        for schema in schemas_df['SCHEMA_NAME']:
            print(f"   • {schema}")
        
        # Get tables in SalesLT schema specifically
        saleslt_query = """
        SELECT TABLE_NAME, TABLE_TYPE
        FROM INFORMATION_SCHEMA.TABLES 
        WHERE TABLE_SCHEMA = 'SalesLT'
        ORDER BY TABLE_NAME
        """
        
        saleslt_tables = pd.read_sql(saleslt_query, conn)
        
        if len(saleslt_tables) > 0:
            print(f"\n📊 SalesLT Schema Tables ({len(saleslt_tables)}):")
            for idx, row in saleslt_tables.iterrows():
                print(f"   • {row['TABLE_NAME']} ({row['TABLE_TYPE']})")
        else:
            print(f"\n❌ No tables found in SalesLT schema")
            
            # Check all tables to see what's available
            all_tables_query = """
            SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE
            FROM INFORMATION_SCHEMA.TABLES 
            WHERE TABLE_TYPE = 'BASE TABLE'
            ORDER BY TABLE_SCHEMA, TABLE_NAME
            """
            all_tables = pd.read_sql(all_tables_query, conn)
            print(f"\n📋 All Available Tables ({len(all_tables)}):")
            for idx, row in all_tables.iterrows():
                print(f"   • {row['TABLE_SCHEMA']}.{row['TABLE_NAME']}")
        
        # Get sample data from first SalesLT table if available
        if len(saleslt_tables) > 0:
            first_table = saleslt_tables.iloc[0]['TABLE_NAME']
            sample_query = f"SELECT TOP 3 * FROM SalesLT.{first_table}"
            
            try:
                sample_data = pd.read_sql(sample_query, conn)
                print(f"\n📄 Sample Data from SalesLT.{first_table}:")
                print(sample_data.to_string(index=False))
                print(f"   Columns: {list(sample_data.columns)}")
                print(f"   Data types: {sample_data.dtypes.to_dict()}")
            except Exception as e:
                print(f"⚠️ Could not fetch sample data: {e}")
        
        conn.close()
        return saleslt_tables
        
    except Exception as e:
        print(f"❌ Error discovering database structure: {e}")
        return pd.DataFrame()

# Run discovery
if working_conn_string:
    discovered_tables = discover_database_structure(working_conn_string)
else:
    print("⚠️ Skipping discovery - no working connection")

In [None]:
# Extract sample data for retail pipeline
print("📊 STEP 4: Extract Sample Data for Retail Pipeline")
print("=" * 60)

def extract_saleslt_data(conn_string):
    """Extract key tables from SalesLT schema for retail pipeline"""
    
    # Define the core tables we need for retail analytics
    core_tables = {
        'customers': 'SalesLT.Customer',
        'products': 'SalesLT.Product', 
        'orders': 'SalesLT.SalesOrderHeader',
        'order_details': 'SalesLT.SalesOrderDetail',
        'addresses': 'SalesLT.Address',
        'product_categories': 'SalesLT.ProductCategory'
    }
    
    extracted_data = {}
    
    try:
        conn = pyodbc.connect(conn_string)
        
        for table_alias, table_name in core_tables.items():
            try:
                print(f"📥 Extracting {table_alias} from {table_name}")
                
                # Use SELECT TOP for initial testing
                query = f"SELECT TOP 100 * FROM {table_name}"
                df = pd.read_sql(query, conn)
                
                # Add extraction metadata
                df['_extracted_at'] = datetime.now()
                df['_source_table'] = table_name
                df['_extraction_method'] = 'direct_sql'
                
                extracted_data[table_alias] = df
                
                print(f"   ✅ Success: {len(df)} rows, {len(df.columns)} columns")
                
                # Show sample of first few rows
                if len(df) > 0:
                    print(f"   📋 Sample columns: {list(df.columns[:5])}")
                    if len(df.columns) > 5:
                        print(f"      ... and {len(df.columns) - 5} more columns")
                
            except Exception as e:
                print(f"   ❌ Failed to extract {table_alias}: {e}")
                # Create empty DataFrame with metadata for failed extractions
                extracted_data[table_alias] = pd.DataFrame({
                    '_extracted_at': [datetime.now()],
                    '_source_table': [table_name],
                    '_extraction_method': ['failed'],
                    '_error': [str(e)]
                })
        
        conn.close()
        
        # Summary of extraction
        print(f"\n📈 EXTRACTION SUMMARY:")
        total_rows = 0
        successful_tables = 0
        
        for table_name, df in extracted_data.items():
            row_count = len(df)
            if '_error' not in df.columns:
                total_rows += row_count
                successful_tables += 1
                status = "✅"
            else:
                status = "❌"
            
            print(f"   {status} {table_name}: {row_count} rows")
        
        print(f"\n🎯 Successfully extracted {successful_tables}/{len(core_tables)} tables")
        print(f"📊 Total rows extracted: {total_rows:,}")
        
        return extracted_data
        
    except Exception as e:
        print(f"❌ Fatal error during extraction: {e}")
        return {}

# Run data extraction
if working_conn_string:
    print("🚀 Starting data extraction...")
    retail_data = extract_saleslt_data(working_conn_string)
else:
    print("⚠️ Skipping data extraction - no working connection")
    retail_data = {}
    
    # Create mock data for pipeline demonstration
    print("🎭 Creating mock data for pipeline demonstration...")
    retail_data = {
        'customers': pd.DataFrame({
            'CustomerID': range(1, 6),
            'FirstName': ['John', 'Jane', 'Bob', 'Alice', 'Charlie'],
            'LastName': ['Doe', 'Smith', 'Johnson', 'Williams', 'Brown'],
            'EmailAddress': ['john@email.com', 'jane@email.com', 'bob@email.com', 'alice@email.com', 'charlie@email.com'],
            '_extracted_at': [datetime.now()] * 5,
            '_source_table': ['mock_data'] * 5,
            '_extraction_method': ['mock'] * 5
        }),
        'products': pd.DataFrame({
            'ProductID': range(1, 6),
            'Name': ['Product A', 'Product B', 'Product C', 'Product D', 'Product E'],
            'ListPrice': [29.99, 49.99, 79.99, 99.99, 129.99],
            '_extracted_at': [datetime.now()] * 5,
            '_source_table': ['mock_data'] * 5,
            '_extraction_method': ['mock'] * 5
        })
    }
    print("✅ Mock data created for pipeline testing")

In [None]:
# Initialize Fabric Retail Pipeline
print("🏭 STEP 5: Initialize Microsoft Fabric Retail Pipeline")
print("=" * 60)

# Create a simplified version of the pipeline for SQL data
class SQLFabricRetailPipeline:
    """
    Simplified retail pipeline for SQL Server data source
    Implements Bronze -> Silver -> Gold medallion architecture
    """
    
    def __init__(self):
        self.bronze_data = {}
        self.silver_data = {}
        self.gold_data = {}
        self.pipeline_metadata = {
            'started_at': datetime.now(),
            'source_type': 'sql_server',
            'database_name': DATABASE_NAME,
            'schema_name': SCHEMA_NAME
        }
        
    def ingest_bronze_from_sql(self, sql_data_dict):
        """Load SQL extracted data into Bronze layer"""
        print("📥 Loading data into Bronze layer...")
        
        ingestion_time = datetime.now().isoformat()
        
        for table_name, df in sql_data_dict.items():
            if len(df) > 0 and '_error' not in df.columns:
                # Add bronze layer metadata
                bronze_df = df.copy()
                bronze_df['_bronze_ingestion_timestamp'] = ingestion_time
                bronze_df['_bronze_record_id'] = range(1, len(bronze_df) + 1)
                bronze_df['_bronze_processing_status'] = 'ingested'
                
                self.bronze_data[table_name] = bronze_df
                print(f"   ✅ {table_name}: {len(bronze_df)} rows ingested")
            else:
                print(f"   ⚠️ {table_name}: Skipped due to errors or empty data")
        
        print(f"\n🎯 Bronze layer complete: {len(self.bronze_data)} tables loaded")
        return len(self.bronze_data) > 0
    
    def transform_to_silver_basic(self):
        """Basic Silver layer transformations"""
        print("🔧 Transforming to Silver layer...")
        
        for table_name, df in self.bronze_data.items():
            print(f"   🔄 Processing {table_name}")
            
            silver_df = df.copy()
            
            # Basic data cleaning
            # Remove columns that are just metadata from extraction
            metadata_cols = [col for col in silver_df.columns if col.startswith('_') and not col.startswith('_bronze')]
            silver_df = silver_df.drop(columns=metadata_cols, errors='ignore')
            
            # Add silver layer metadata
            silver_df['_silver_processed_timestamp'] = datetime.now().isoformat()
            silver_df['_silver_data_quality_score'] = 100.0  # Placeholder
            
            # Basic data type inference and cleaning
            for col in silver_df.columns:
                if not col.startswith('_'):
                    # Try to infer better data types
                    if silver_df[col].dtype == 'object':
                        # Check if it's numeric
                        try:
                            numeric_col = pd.to_numeric(silver_df[col], errors='coerce')
                            if not numeric_col.isna().all():
                                silver_df[col] = numeric_col
                        except:
                            pass
                        
                        # Check if it's datetime
                        try:
                            date_col = pd.to_datetime(silver_df[col], errors='coerce')
                            if not date_col.isna().all():
                                silver_df[col] = date_col
                        except:
                            pass
            
            self.silver_data[table_name] = silver_df
            print(f"      ✅ Cleaned {len(silver_df)} rows, {len(silver_df.columns)} columns")
        
        print(f"\n🎯 Silver layer complete: {len(self.silver_data)} tables processed")
    
    def create_gold_analytics_basic(self):
        """Basic Gold layer analytics"""
        print("💎 Creating Gold layer analytics...")
        
        # Customer analytics (if customers table exists)
        if 'customers' in self.silver_data:
            customers_df = self.silver_data['customers']
            customer_analytics = pd.DataFrame({
                'total_customers': [len(customers_df)],
                'analysis_timestamp': [datetime.now().isoformat()],
                'data_source': [f"{DATABASE_NAME}.{SCHEMA_NAME}"]
            })
            self.gold_data['customer_summary'] = customer_analytics
            print(f"   ✅ Customer analytics: {len(customers_df)} customers analyzed")
        
        # Product analytics (if products table exists) 
        if 'products' in self.silver_data:
            products_df = self.silver_data['products']
            product_analytics = pd.DataFrame({
                'total_products': [len(products_df)],
                'analysis_timestamp': [datetime.now().isoformat()],
                'data_source': [f"{DATABASE_NAME}.{SCHEMA_NAME}"]
            })
            
            # Basic price analysis if ListPrice column exists
            price_cols = [col for col in products_df.columns if 'price' in col.lower()]
            if price_cols:
                price_col = price_cols[0]
                try:
                    price_data = pd.to_numeric(products_df[price_col], errors='coerce')
                    if not price_data.isna().all():
                        product_analytics['avg_price'] = [price_data.mean()]
                        product_analytics['min_price'] = [price_data.min()]
                        product_analytics['max_price'] = [price_data.max()]
                except:
                    pass
            
            self.gold_data['product_summary'] = product_analytics
            print(f"   ✅ Product analytics: {len(products_df)} products analyzed")
        
        # Overall pipeline summary
        pipeline_summary = pd.DataFrame({
            'pipeline_run_id': [f"fabric_sql_{datetime.now().strftime('%Y%m%d_%H%M%S')}"],
            'source_database': [DATABASE_NAME],
            'source_schema': [SCHEMA_NAME],
            'bronze_tables': [len(self.bronze_data)],
            'silver_tables': [len(self.silver_data)],
            'gold_tables': [len(self.gold_data)],
            'total_rows_processed': [sum(len(df) for df in self.silver_data.values())],
            'pipeline_completion_time': [datetime.now().isoformat()],
            'status': ['completed']
        })
        
        self.gold_data['pipeline_summary'] = pipeline_summary
        print(f"   ✅ Pipeline summary created")
        
        print(f"\n🎯 Gold layer complete: {len(self.gold_data)} analytics tables created")

# Initialize and run the pipeline
print("🚀 Initializing SQL Fabric Retail Pipeline...")
sql_pipeline = SQLFabricRetailPipeline()

if retail_data and len(retail_data) > 0:
    print(f"📊 Input data: {len(retail_data)} tables available")
    
    # Run Bronze layer
    bronze_success = sql_pipeline.ingest_bronze_from_sql(retail_data)
    
    if bronze_success:
        # Run Silver layer
        sql_pipeline.transform_to_silver_basic()
        
        # Run Gold layer
        sql_pipeline.create_gold_analytics_basic()
        
        print(f"\n🎉 PIPELINE EXECUTION COMPLETED!")
        print(f"   📊 Bronze: {len(sql_pipeline.bronze_data)} tables")
        print(f"   🔧 Silver: {len(sql_pipeline.silver_data)} tables") 
        print(f"   💎 Gold: {len(sql_pipeline.gold_data)} tables")
    else:
        print("❌ Pipeline failed at Bronze layer")
else:
    print("⚠️ No data available for pipeline execution")

In [None]:
# Validate pipeline results and display insights
print("🔍 STEP 6: Validate Pipeline Results and Display Insights")
print("=" * 60)

def display_pipeline_insights(pipeline):
    """Display comprehensive insights from the pipeline execution"""
    
    if not hasattr(pipeline, 'gold_data') or len(pipeline.gold_data) == 0:
        print("⚠️ No gold layer data available for insights")
        return
    
    print("📊 PIPELINE INSIGHTS DASHBOARD")
    print("=" * 40)
    
    # Pipeline Summary
    if 'pipeline_summary' in pipeline.gold_data:
        summary = pipeline.gold_data['pipeline_summary']
        print("\n🎯 EXECUTION SUMMARY:")
        for col in summary.columns:
            if not col.startswith('_'):
                value = summary[col].iloc[0]
                print(f"   {col}: {value}")
    
    # Customer Insights
    if 'customer_summary' in pipeline.gold_data:
        customer_summary = pipeline.gold_data['customer_summary']
        print(f"\n👥 CUSTOMER INSIGHTS:")
        print(f"   Total Customers: {customer_summary['total_customers'].iloc[0]:,}")
        
        # Display sample customer data if available
        if 'customers' in pipeline.silver_data:
            customers_df = pipeline.silver_data['customers']
            print(f"   Sample Customer Data:")
            # Show first few columns and rows
            display_cols = [col for col in customers_df.columns if not col.startswith('_')][:5]
            sample_data = customers_df[display_cols].head(3)
            print(sample_data.to_string(index=False))
    
    # Product Insights  
    if 'product_summary' in pipeline.gold_data:
        product_summary = pipeline.gold_data['product_summary']
        print(f"\n🛍️ PRODUCT INSIGHTS:")
        print(f"   Total Products: {product_summary['total_products'].iloc[0]:,}")
        
        # Price analysis if available
        if 'avg_price' in product_summary.columns:
            print(f"   Average Price: ${product_summary['avg_price'].iloc[0]:.2f}")
            print(f"   Price Range: ${product_summary['min_price'].iloc[0]:.2f} - ${product_summary['max_price'].iloc[0]:.2f}")
        
        # Display sample product data if available
        if 'products' in pipeline.silver_data:
            products_df = pipeline.silver_data['products']
            print(f"   Sample Product Data:")
            display_cols = [col for col in products_df.columns if not col.startswith('_')][:5]
            sample_data = products_df[display_cols].head(3)
            print(sample_data.to_string(index=False))
    
    # Data Quality Assessment
    print(f"\n📈 DATA QUALITY ASSESSMENT:")
    
    for layer_name, layer_data in [('Bronze', pipeline.bronze_data), 
                                   ('Silver', pipeline.silver_data), 
                                   ('Gold', pipeline.gold_data)]:
        print(f"\n   {layer_name} Layer:")
        if layer_data:
            total_rows = sum(len(df) for df in layer_data.values())
            total_cols = sum(len(df.columns) for df in layer_data.values())
            print(f"      Tables: {len(layer_data)}")
            print(f"      Total Rows: {total_rows:,}")
            print(f"      Total Columns: {total_cols}")
            
            # Show table breakdown
            for table_name, df in layer_data.items():
                print(f"         • {table_name}: {len(df)} rows × {len(df.columns)} cols")
        else:
            print(f"      No data available")

# Display insights if pipeline was successful
if 'sql_pipeline' in locals() and hasattr(sql_pipeline, 'gold_data'):
    display_pipeline_insights(sql_pipeline)
else:
    print("⚠️ Pipeline not executed successfully - no insights available")

# Check for common issues and provide recommendations
print(f"\n🔧 TROUBLESHOOTING RECOMMENDATIONS:")
print("=" * 45)

if successful_connection:
    print("✅ Database Connection: Working")
else:
    print("❌ Database Connection: Failed")
    print("   💡 Try: Check Fabric workspace permissions")
    print("   💡 Try: Verify database name spelling")
    print("   💡 Try: Contact workspace administrator")

if 'discovered_tables' in locals() and len(discovered_tables) > 0:
    print("✅ Schema Access: SalesLT schema accessible")
else:
    print("❌ Schema Access: Cannot access SalesLT schema")
    print("   💡 Try: Check read permissions on SalesLT schema")
    print("   💡 Try: Verify schema exists in database")

if 'sql_pipeline' in locals() and len(sql_pipeline.bronze_data) > 0:
    print("✅ Data Extraction: Successful")
else:
    print("❌ Data Extraction: Failed or incomplete")
    print("   💡 Try: Check table permissions")
    print("   💡 Try: Verify data exists in tables")

print(f"\n📋 NEXT STEPS:")
print("1. 🔧 Fix any connection issues identified above")
print("2. 📊 Run the Export_SalesLT_to_Bronze.ipynb notebook")
print("3. 🏗️ Set up Bronze-Silver-Gold layer transformations")
print("4. 📈 Build retail analytics dashboards")
print("5. 🚀 Deploy to production Fabric environment")

In [None]:
# Export pipeline data and create documentation
print("💾 STEP 7: Export Data and Create Documentation")
print("=" * 60)

def export_pipeline_data(pipeline, output_prefix="fabric_sql_pipeline"):
    """Export all pipeline data to multiple formats"""
    
    if not hasattr(pipeline, 'bronze_data'):
        print("⚠️ No pipeline data to export")
        return
    
    export_summary = {
        'export_timestamp': datetime.now().isoformat(),
        'source_database': DATABASE_NAME,
        'source_schema': SCHEMA_NAME,
        'exported_layers': [],
        'file_formats': ['parquet', 'csv', 'json']
    }
    
    # Export each layer
    for layer_name, layer_data in [('bronze', pipeline.bronze_data), 
                                   ('silver', pipeline.silver_data), 
                                   ('gold', pipeline.gold_data)]:
        
        if not layer_data:
            continue
            
        print(f"\n📁 Exporting {layer_name.title()} Layer:")
        layer_info = {
            'layer_name': layer_name,
            'tables': [],
            'total_rows': 0,
            'total_files': 0
        }
        
        for table_name, df in layer_data.items():
            if len(df) == 0:
                continue
                
            file_prefix = f"Files/{layer_name}/{output_prefix}_{table_name}"
            
            try:
                # Export to Parquet (recommended for Fabric)
                parquet_path = f"{file_prefix}.parquet"
                df.to_parquet(parquet_path, index=False)
                print(f"   ✅ {table_name}.parquet: {len(df)} rows")
                
                # Export to CSV (for compatibility)
                csv_path = f"{file_prefix}.csv"
                df.to_csv(csv_path, index=False)
                print(f"   ✅ {table_name}.csv: {len(df)} rows")
                
                # Export metadata as JSON
                metadata = {
                    'table_name': table_name,
                    'layer': layer_name,
                    'row_count': len(df),
                    'column_count': len(df.columns),
                    'columns': list(df.columns),
                    'data_types': df.dtypes.astype(str).to_dict(),
                    'export_timestamp': datetime.now().isoformat(),
                    'file_size_mb': round(df.memory_usage(deep=True).sum() / 1024 / 1024, 2)
                }
                
                json_path = f"{file_prefix}_metadata.json"
                with open(json_path, 'w') as f:
                    json.dump(metadata, f, indent=2)
                
                layer_info['tables'].append({
                    'name': table_name,
                    'rows': len(df),
                    'columns': len(df.columns),
                    'files': [parquet_path, csv_path, json_path]
                })
                layer_info['total_rows'] += len(df)
                layer_info['total_files'] += 3
                
            except Exception as e:
                print(f"   ❌ Failed to export {table_name}: {e}")
        
        export_summary['exported_layers'].append(layer_info)
        print(f"   📊 {layer_name.title()} summary: {layer_info['total_files']} files, {layer_info['total_rows']} rows")
    
    # Create overall export summary
    summary_path = f"Files/{output_prefix}_export_summary.json"
    with open(summary_path, 'w') as f:
        json.dump(export_summary, f, indent=2)
    
    print(f"\n📄 Export summary saved: {summary_path}")
    return export_summary

# Export data if pipeline was successful
if 'sql_pipeline' in locals() and hasattr(sql_pipeline, 'bronze_data'):
    if len(sql_pipeline.bronze_data) > 0:
        print("🚀 Starting data export...")
        export_result = export_pipeline_data(sql_pipeline)
        
        if export_result:
            print(f"\n🎉 EXPORT COMPLETED SUCCESSFULLY!")
            print(f"📁 Files saved to: Files/ directory")
            print(f"📊 Total layers exported: {len(export_result['exported_layers'])}")
            total_files = sum(layer['total_files'] for layer in export_result['exported_layers'])
            print(f"📄 Total files created: {total_files}")
    else:
        print("⚠️ No data available for export")
else:
    print("⚠️ Pipeline not executed - skipping export")

# Final status and recommendations
print(f"\n" + "="*70)
print(f"🎯 FABRIC SQL CONNECTION TROUBLESHOOTING COMPLETE")
print(f"="*70)

print(f"\n📋 EXECUTION STATUS:")
if successful_connection:
    print("✅ Database Connection: SUCCESS")
else:
    print("❌ Database Connection: FAILED")

if 'discovered_tables' in locals() and len(discovered_tables) > 0:
    print("✅ Schema Discovery: SUCCESS")
else:
    print("❌ Schema Discovery: FAILED")

if 'sql_pipeline' in locals() and len(sql_pipeline.bronze_data) > 0:
    print("✅ Pipeline Execution: SUCCESS")
else:
    print("❌ Pipeline Execution: FAILED")

print(f"\n🔧 IF CONNECTIONS FAILED:")
print("1. Check your Fabric workspace has access to Gaiye-SQL-DB")
print("2. Verify you have read permissions on SalesLT schema")
print("3. Confirm database name spelling: 'Gaiye-SQL-DB'")
print("4. Try refreshing browser and reconnecting to workspace")
print("5. Contact workspace administrator for access verification")

print(f"\n🚀 IF CONNECTIONS WORKED:")
print("1. Use the Export_SalesLT_to_Bronze.ipynb notebook")
print("2. Modify connection string based on working method found above")
print("3. Proceed with Bronze-Silver-Gold layer development")
print("4. Deploy retail analytics solution to Fabric")

print(f"\n📚 ADDITIONAL RESOURCES:")
print("• Microsoft Fabric Documentation: https://docs.microsoft.com/fabric/")
print("• SQL Database Connectivity: https://docs.microsoft.com/fabric/data-warehouse/")
print("• Retail Industry Solutions: https://docs.microsoft.com/industry/retail/")

print(f"\n✨ Troubleshooting completed at: {datetime.now()}")