# SalesLT to Bronze Lakehouse Copy

**Objective**: Copy SalesLT tables from shortcuts in `Gaiye_Test_Lakehouse` to bronze layer in `RDS_Fabric_Foundry_workspace_Gaiye_Retail_Solution_Test_IDM_LH_bronze`

**Setup Required**:
1. Run this notebook in the bronze lakehouse: `RDS_Fabric_Foundry_workspace_Gaiye_Retail_Solution_Test_IDM_LH_bronze`
2. Attach `Gaiye_Test_Lakehouse` as additional lakehouse
3. Execute cells sequentially

## Step 1: Import Libraries and Setup

In [None]:
# Import required libraries
import pandas as pd
from datetime import datetime
from pyspark.sql.functions import lit

print("✅ Libraries imported")
print(f"📅 Started: {datetime.now()}")
print("🎯 Target: Files/bronze/saleslt/ in current lakehouse")

## Step 2: Environment Check

In [None]:
# Check environment and available tables
print("🔍 ENVIRONMENT CHECK")
print("=" * 50)

# Check Spark
print(f"✅ Spark version: {spark.version}")

# Check available tables
tables_df = spark.sql("SHOW TABLES").toPandas()
print(f"✅ Available tables: {len(tables_df)}")

# Look for SalesLT tables (shortcuts)
expected_tables = ['address', 'customer', 'customeraddress', 'product', 
                  'productcategory', 'productdescription', 'productmodel',
                  'productmodelproductdescription', 'salesorderdetail', 'salesorderheader']

found_tables = []
for table in expected_tables:
    if table in tables_df['tableName'].str.lower().values:
        found_tables.append(table)
        print(f"   ✅ Found: {table}")
    else:
        print(f"   ❌ Missing: {table}")

print(f"\n📊 Found {len(found_tables)} of {len(expected_tables)} expected tables")

if len(found_tables) == 0:
    print("⚠️ No SalesLT tables found!")
    print("💡 Ensure Gaiye_Test_Lakehouse is attached as additional lakehouse")
else:
    print(f"🎉 Ready to copy {len(found_tables)} tables!")

## Step 3: Test Write Access

In [None]:
# Test write access to bronze layer
print("🧪 TESTING WRITE ACCESS")
print("=" * 50)

try:
    # Create test data
    test_data = [("test", datetime.now().isoformat())]
    test_df = spark.createDataFrame(test_data, ["status", "timestamp"])
    
    # Test write to bronze location
    test_path = "Files/bronze/saleslt/_test"
    test_df.write.mode("overwrite").parquet(test_path)
    
    # Verify read
    verify_df = spark.read.parquet(test_path)
    count = verify_df.count()
    
    # Clean up
    dbutils.fs.rm(test_path, True)
    
    print(f"✅ Write access confirmed")
    print(f"✅ Test file created and read ({count} records)")
    print(f"🎯 Ready to write to Files/bronze/saleslt/")
    
except Exception as e:
    print(f"❌ Write test failed: {str(e)}")
    print(f"💡 Ensure you're in the correct bronze lakehouse")

## Step 4: Copy Tables to Bronze

In [None]:
# Copy all available SalesLT tables to bronze layer
print("🚀 COPYING TABLES TO BRONZE LAYER")
print("=" * 60)

# Get available tables again
tables_df = spark.sql("SHOW TABLES").toPandas()
expected_tables = ['address', 'customer', 'customeraddress', 'product', 
                  'productcategory', 'productdescription', 'productmodel',
                  'productmodelproductdescription', 'salesorderdetail', 'salesorderheader']

# Find available tables
available_tables = []
for table in expected_tables:
    if table in tables_df['tableName'].str.lower().values:
        available_tables.append(table)

print(f"📋 Copying {len(available_tables)} tables")
print(f"🎯 Target: Files/bronze/saleslt/")
print()

results = []
total_rows = 0

for i, table_name in enumerate(available_tables, 1):
    print(f"[{i}/{len(available_tables)}] Copying {table_name}...")
    
    try:
        # Read source table
        source_df = spark.sql(f"SELECT * FROM {table_name}")
        row_count = source_df.count()
        
        # Add metadata columns
        bronze_df = source_df \
            .withColumn("_bronze_load_date", lit(datetime.now().strftime("%Y-%m-%d"))) \
            .withColumn("_bronze_load_timestamp", lit(datetime.now().isoformat())) \
            .withColumn("_source_system", lit("SalesLT")) \
            .withColumn("_source_table", lit(table_name)) \
            .withColumn("_load_method", lit("shortcut_copy"))
        
        # Write to bronze
        bronze_path = f"Files/bronze/saleslt/{table_name}"
        bronze_df.write.mode("overwrite").option("overwriteSchema", "true").parquet(bronze_path)
        
        total_rows += row_count
        results.append({"table": table_name, "rows": row_count, "status": "success"})
        
        print(f"   ✅ {row_count:,} rows copied")
        
    except Exception as e:
        error_msg = str(e)[:80]
        results.append({"table": table_name, "rows": 0, "status": "failed", "error": error_msg})
        print(f"   ❌ Failed: {error_msg}...")
    
    print()

# Summary
successful = [r for r in results if r["status"] == "success"]
failed = [r for r in results if r["status"] == "failed"]

print("🎉 COPY SUMMARY")
print("=" * 60)
print(f"✅ Successful: {len(successful)} tables")
print(f"❌ Failed: {len(failed)} tables")
print(f"📊 Total rows copied: {total_rows:,}")

if len(successful) > 0:
    print(f"\n📁 Bronze layer structure:")
    print(f"Files/bronze/saleslt/")
    for result in successful:
        print(f"├── {result['table']}/ ({result['rows']:,} rows)")

if len(failed) > 0:
    print(f"\n⚠️ Failed copies:")
    for result in failed:
        print(f"❌ {result['table']}: {result.get('error', 'Unknown error')}")

print(f"\n🎯 Data location: Files/bronze/saleslt/ in current lakehouse")
print("=" * 60)

## Step 5: Validation

In [None]:
# Validate the bronze layer data
print("🔍 BRONZE LAYER VALIDATION")
print("=" * 60)

try:
    # Check Files directory
    files_content = dbutils.fs.ls("Files/")
    print(f"📁 Files directory contains {len(files_content)} items:")
    for item in files_content:
        print(f"   📂 {item.name}")
    
    # Check bronze directory
    if any(item.name.rstrip('/') == 'bronze' for item in files_content):
        print(f"\n🎯 Bronze directory found! Checking contents...")
        
        bronze_content = dbutils.fs.ls("Files/bronze/")
        print(f"📁 Bronze directory contains {len(bronze_content)} items:")
        for item in bronze_content:
            print(f"   📂 {item.name}")
        
        # Check saleslt directory
        if any(item.name.rstrip('/') == 'saleslt' for item in bronze_content):
            print(f"\n🎉 SalesLT directory found! Checking tables...")
            
            saleslt_content = dbutils.fs.ls("Files/bronze/saleslt/")
            print(f"📊 Found {len(saleslt_content)} table directories:")
            
            total_validation_rows = 0
            
            for item in saleslt_content:
                if item.isDir() and not item.name.startswith('_'):
                    table_name = item.name.rstrip('/')
                    try:
                        # Read and count rows
                        df = spark.read.parquet(f"Files/bronze/saleslt/{table_name}")
                        row_count = df.count()
                        total_validation_rows += row_count
                        
                        # Check for metadata columns
                        metadata_cols = [col for col in df.columns if col.startswith('_')]
                        
                        print(f"   ✅ {table_name}: {row_count:,} rows, {len(metadata_cols)} metadata columns")
                        
                    except Exception as e:
                        print(f"   ⚠️ {table_name}: Could not read ({str(e)[:30]}...)")
            
            print(f"\n📊 VALIDATION SUMMARY:")
            print(f"   📋 Table directories: {len([i for i in saleslt_content if i.isDir() and not i.name.startswith('_')])}")
            print(f"   📊 Total validated rows: {total_validation_rows:,}")
            print(f"   🎯 Location: Files/bronze/saleslt/")
            print(f"\n🎉 SUCCESS! Your SalesLT data is now in the bronze layer!")
        else:
            print(f"❌ SalesLT directory not found in bronze")
    else:
        print(f"❌ Bronze directory not found in Files")
        
except Exception as e:
    print(f"❌ Validation failed: {str(e)}")
    print(f"💡 Check if the copy process completed successfully")

print(f"\n{'=' * 60}")
print(f"🏁 Process completed at {datetime.now()}")
print(f"📍 Bronze data location: Files/bronze/saleslt/")
print(f"🎯 Target lakehouse: RDS_Fabric_Foundry_workspace_Gaiye_Retail_Solution_Test_IDM_LH_bronze")
print(f"{'=' * 60}")