In [4]:
# COMPLETE SETUP & DATA GENERATION - RUN THIS FIRST
# Make sure Lakehouse is attached before running!

print("🚀 REIMAGE-AI SMART PARKING - COMPLETE SETUP")

# =============================================================================
# STEP 1: Verify Lakehouse Connection
# =============================================================================
print("🔧 STEP 1: VERIFYING LAKEHOUSE CONNECTION...")

try:
    tables = spark.sql("SHOW TABLES").collect()
    print(f"✅ Lakehouse connected! Found {len(tables)} tables")
except Exception as e:
    print("❌ No Lakehouse attached! Please manually attach Lakehouse first:")
    print("1. Click 'Add' in top-right of notebook")
    print("2. Select 'Existing Lakehouse'")
    print("3. Choose 'ParkingDataLakehouse'")
    print("4. Run this cell again")
    raise e

# =============================================================================
# STEP 2: Clean Up Existing Tables (If Any)
# =============================================================================
print("\n🧹 STEP 2: CLEANING UP EXISTING TABLES...")

tables_to_clean = ["ParkingSensorData", "TrafficCameraData", "HistoricalTraffic", "SensorMaintenanceData"]

for table_name in tables_to_clean:
    try:
        spark.sql(f"DROP TABLE IF EXISTS {table_name}")
        print(f"✅ Dropped table: {table_name}")
    except Exception as e:
        print(f"ℹ️  Could not drop {table_name}: {e}")

# =============================================================================
# STEP 3: Create Fresh Tables
# =============================================================================
print("\n📋 STEP 3: CREATING FRESH TABLES...")

tables_sql = {
    "ParkingSensorData": """
        CREATE TABLE ParkingSensorData (
            sensor_id STRING,
            timestamp TIMESTAMP,
            occupancy_status BOOLEAN,
            vehicle_count INT,
            temperature DOUBLE,
            vibration_level DOUBLE,
            location STRING,
            parking_zone STRING,
            transaction_hash STRING
        ) USING DELTA
    """,
    "TrafficCameraData": """
        CREATE TABLE TrafficCameraData (
            camera_id STRING,
            timestamp TIMESTAMP,
            image_url STRING,
            vehicle_count INT,
            traffic_density DOUBLE,
            average_speed INT,
            congestion_level STRING,
            processed BOOLEAN,
            yolo_results STRING
        ) USING DELTA
    """,
    "HistoricalTraffic": """
        CREATE TABLE HistoricalTraffic (
            zone_id STRING,
            date DATE,
            hour INT,
            average_occupancy DOUBLE,
            traffic_volume INT,
            weather_condition STRING,
            event_day BOOLEAN
        ) USING DELTA
    """
}

for table_name, sql in tables_sql.items():
    try:
        spark.sql(sql)
        print(f"✅ Created: {table_name}")
    except Exception as e:
        print(f"❌ Failed to create {table_name}: {e}")

print("🎉 Tables created successfully!")

# =============================================================================
# STEP 4: Generate Synthetic Data
# =============================================================================
print("\n🚀 STEP 4: GENERATING SYNTHETIC DATA...")

from faker import Faker
from datetime import datetime, timedelta
import random
import json

class DataGenerator:
    def __init__(self):
        self.fake = Faker()
    
    def generate_parking_data(self, num_records=200):
        print(f"🅿️ Generating {num_records} parking records...")
        records = []
        locations = ['Downtown', 'Shopping Mall', 'Airport', 'Hospital', 'University']
        zones = ['ZONE_A', 'ZONE_B', 'ZONE_C', 'ZONE_D', 'ZONE_E']
        
        for i in range(num_records):
            sensor_id = f"SENSOR_{random.randint(1, 50):03d}"
            timestamp = self.fake.date_time_between(start_date="-30d", end_date="now")
            
            # Realistic occupancy patterns
            hour = timestamp.hour
            if 8 <= hour <= 18:  # Business hours
                occupancy_prob = 0.7
            else:  # Off hours
                occupancy_prob = 0.3
                
            occupied = random.random() < occupancy_prob
            
            records.append({
                'sensor_id': sensor_id,
                'timestamp': timestamp,
                'occupancy_status': bool(occupied),
                'vehicle_count': int(1 if occupied else 0),  # Explicit int conversion
                'temperature': float(round(random.uniform(15.0, 35.0), 2)),
                'vibration_level': float(round(random.uniform(0.1, 5.0), 2)),
                'location': str(random.choice(locations)),
                'parking_zone': str(random.choice(zones)),
                'transaction_hash': str(self.fake.sha256())
            })
        
        print(f"✅ Generated {len(records)} parking records")
        return records
    
    def generate_traffic_data(self, num_records=100):
        print(f"🚦 Generating {num_records} traffic records...")
        records = []
        
        for i in range(num_records):
            camera_id = f"CAM_{random.randint(1, 20):03d}"
            timestamp = self.fake.date_time_between(start_date="-30d", end_date="now")
            
            # Time-based traffic patterns
            hour = timestamp.hour
            if 7 <= hour <= 9 or 16 <= hour <= 18:  # Rush hours
                vehicles = random.randint(20, 50)
                density = round(random.uniform(0.7, 0.95), 2)
                speed = random.randint(20, 40)
            else:  # Normal hours
                vehicles = random.randint(5, 25)
                density = round(random.uniform(0.2, 0.6), 2)
                speed = random.randint(40, 60)
                
            congestion = "HIGH" if density > 0.7 else "MEDIUM" if density > 0.4 else "LOW"
            
            # YOLO simulation data
            yolo_data = {
                'vehicles_detected': vehicles,
                'confidence': round(random.uniform(0.85, 0.98), 2),
                'processing_time_ms': random.randint(100, 300)
            }
            
            records.append({
                'camera_id': str(camera_id),
                'timestamp': timestamp,
                'image_url': str(f"https://trafficcams.com/{camera_id}/{timestamp.strftime('%Y%m%d_%H%M%S')}.jpg"),
                'vehicle_count': int(vehicles),
                'traffic_density': float(density),
                'average_speed': int(speed),
                'congestion_level': str(congestion),
                'processed': bool(True),
                'yolo_results': str(json.dumps(yolo_data))
            })
        
        print(f"✅ Generated {len(records)} traffic records")
        return records
    
    def generate_historical_data(self, days=30):
        print(f"📅 Generating {days} days of historical data...")
        records = []
        start_date = datetime.now() - timedelta(days=days)
        zones = ['ZONE_A', 'ZONE_B', 'ZONE_C', 'ZONE_D', 'ZONE_E']
        weather_types = ['Sunny', 'Rainy', 'Cloudy']
        
        for zone in zones:
            for single_date in (start_date + timedelta(days=n) for n in range(days)):
                for hour in range(24):
                    is_weekend = single_date.weekday() >= 5
                    is_holiday = random.random() < 0.05  # 5% chance of holiday
                    
                    if is_holiday:
                        base_occ = random.uniform(0.7, 0.95)
                    elif is_weekend:
                        base_occ = random.uniform(0.4, 0.8)
                    else:
                        base_occ = random.uniform(0.3, 0.9)
                    
                    hour_mult = 1.0 if 8 <= hour <= 18 else 0.4
                    weather = random.choice(weather_types)
                    weather_impact = 0.8 if weather == 'Rainy' else 1.0
                    
                    occupancy = round(base_occ * hour_mult * weather_impact, 3)
                    volume = int(occupancy * random.randint(500, 2000))
                    
                    records.append({
                        'zone_id': str(zone),
                        'date': single_date.date(),
                        'hour': int(hour),
                        'average_occupancy': float(occupancy),
                        'traffic_volume': int(volume),
                        'weather_condition': str(weather),
                        'event_day': bool(is_holiday)
                    })
        
        print(f"✅ Generated {len(records)} historical records")
        return records

# Generate all datasets
generator = DataGenerator()

print("🎯 GENERATING DATASETS...")
parking_data = generator.generate_parking_data(100)  # Reduced for testing
traffic_data = generator.generate_traffic_data(50)   # Reduced for testing
historical_data = generator.generate_historical_data(7)  # Reduced for testing

print(f"\n📊 GENERATION SUMMARY:")
print(f"   Parking Records: {len(parking_data)}")
print(f"   Traffic Records: {len(traffic_data)}")
print(f"   Historical Records: {len(historical_data)}")

# =============================================================================
# STEP 5: Load Data to Lakehouse (Fixed Version)
# =============================================================================
print("\n💾 STEP 5: LOADING DATA TO LAKEHOUSE...")

from pyspark.sql.types import (
    StructType, StructField, StringType, IntegerType, DoubleType, BooleanType, TimestampType, DateType
)

# Define schemas explicitly to avoid Delta merge conflicts
parking_schema = StructType([
    StructField("sensor_id", StringType(), True),
    StructField("timestamp", TimestampType(), True),
    StructField("occupancy_status", BooleanType(), True),
    StructField("vehicle_count", IntegerType(), True),
    StructField("temperature", DoubleType(), True),
    StructField("vibration_level", DoubleType(), True),
    StructField("location", StringType(), True),
    StructField("parking_zone", StringType(), True),
    StructField("transaction_hash", StringType(), True)
])

traffic_schema = StructType([
    StructField("camera_id", StringType(), True),
    StructField("timestamp", TimestampType(), True),
    StructField("image_url", StringType(), True),
    StructField("vehicle_count", IntegerType(), True),
    StructField("traffic_density", DoubleType(), True),
    StructField("average_speed", IntegerType(), True),
    StructField("congestion_level", StringType(), True),
    StructField("processed", BooleanType(), True),
    StructField("yolo_results", StringType(), True)
])

historical_schema = StructType([
    StructField("zone_id", StringType(), True),
    StructField("date", DateType(), True),
    StructField("hour", IntegerType(), True),
    StructField("average_occupancy", DoubleType(), True),
    StructField("traffic_volume", IntegerType(), True),
    StructField("weather_condition", StringType(), True),
    StructField("event_day", BooleanType(), True)
])

print("🅿️ Loading parking data...")
try:
    parking_df = spark.createDataFrame(parking_data, schema=parking_schema)
    parking_df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("ParkingSensorData")
    parking_count = spark.sql("SELECT COUNT(*) as cnt FROM ParkingSensorData").collect()[0]['cnt']
    print(f"   ✅ ParkingSensorData: {parking_count} records")
except Exception as e:
    print(f"❌ Error loading parking data: {e}")

print("🚦 Loading traffic data...")
try:
    traffic_df = spark.createDataFrame(traffic_data, schema=traffic_schema)
    traffic_df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("TrafficCameraData")
    traffic_count = spark.sql("SELECT COUNT(*) as cnt FROM TrafficCameraData").collect()[0]['cnt']
    print(f"   ✅ TrafficCameraData: {traffic_count} records")
except Exception as e:
    print(f"❌ Error loading traffic data: {e}")

print("📅 Loading historical data...")
try:
    historical_df = spark.createDataFrame(historical_data, schema=historical_schema)
    historical_df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("HistoricalTraffic")
    historical_count = spark.sql("SELECT COUNT(*) as cnt FROM HistoricalTraffic").collect()[0]['cnt']
    print(f"   ✅ HistoricalTraffic: {historical_count} records")
except Exception as e:
    print(f"❌ Error loading historical data: {e}")

print("🎉 Data loading completed successfully!")


# =============================================================================
# STEP 6: Data Quality Validation
# =============================================================================
print("\n🔍 STEP 6: DATA QUALITY VALIDATION...")

# Check data statistics
validation_queries = {
    "Parking Data Quality": """
        SELECT 
            COUNT(*) as total_records,
            AVG(CASE WHEN occupancy_status = true THEN 1.0 ELSE 0.0 END) as occupancy_rate,
            COUNT(DISTINCT sensor_id) as unique_sensors,
            COUNT(DISTINCT parking_zone) as zones_covered,
            MIN(timestamp) as earliest_date,
            MAX(timestamp) as latest_date
        FROM ParkingSensorData
    """,
    "Traffic Data Quality": """
        SELECT 
            COUNT(*) as total_records,
            AVG(traffic_density) as avg_density,
            AVG(vehicle_count) as avg_vehicles,
            COUNT(DISTINCT camera_id) as unique_cameras,
            COUNT(DISTINCT congestion_level) as congestion_levels
        FROM TrafficCameraData
    """,
    "Historical Data Quality": """
        SELECT 
            COUNT(*) as total_records,
            AVG(average_occupancy) as avg_occupancy,
            AVG(traffic_volume) as avg_volume,
            COUNT(DISTINCT zone_id) as unique_zones,
            MIN(date) as start_date,
            MAX(date) as end_date
        FROM HistoricalTraffic
    """
}

for check_name, query in validation_queries.items():
    try:
        result = spark.sql(query).collect()[0]
        print(f"\n{check_name}:")
        for key, value in result.asDict().items():
            if isinstance(value, float):
                print(f"   {key}: {value:.3f}")
            else:
                print(f"   {key}: {value}")
    except Exception as e:
        print(f"❌ Error in {check_name}: {e}")

# =============================================================================
# FINAL SUMMARY
# =============================================================================
print("\n" + "="*60)
print("🎉 REIMAGE-AI SMART PARKING - SETUP COMPLETED!")
print("="*60)

# Show final table status
print("\n📊 FINAL TABLE STATUS:")
try:
    tables = spark.sql("SHOW TABLES").collect()
    for table in tables:
        try:
            count = spark.sql(f"SELECT COUNT(*) as cnt FROM {table['tableName']}").collect()[0]['cnt']
            print(f"   {table['tableName']}: {count} records")
        except:
            print(f"   {table['tableName']}: [Error counting]")
except Exception as e:
    print(f"❌ Error listing tables: {e}")

print("\n✅ NEXT STEPS:")
print("   1. Run Notebook 2: AI Processing & Predictions")
print("   2. Run Notebook 3: Monitoring & Dashboard")
print("   3. Explore data in Lakehouse tables")

StatementMeta(, f2d71779-6f3b-4a3f-b23b-b561d943aa3e, 6, Finished, Available, Finished)

🚀 REIMAGE-AI SMART PARKING - COMPLETE SETUP
🔧 STEP 1: VERIFYING LAKEHOUSE CONNECTION...
✅ Lakehouse connected! Found 3 tables

🧹 STEP 2: CLEANING UP EXISTING TABLES...
✅ Dropped table: ParkingSensorData
✅ Dropped table: TrafficCameraData
✅ Dropped table: HistoricalTraffic
✅ Dropped table: SensorMaintenanceData

📋 STEP 3: CREATING FRESH TABLES...
✅ Created: ParkingSensorData
✅ Created: TrafficCameraData
✅ Created: HistoricalTraffic
🎉 Tables created successfully!

🚀 STEP 4: GENERATING SYNTHETIC DATA...
🎯 GENERATING DATASETS...
🅿️ Generating 100 parking records...
✅ Generated 100 parking records
🚦 Generating 50 traffic records...
✅ Generated 50 traffic records
📅 Generating 7 days of historical data...
✅ Generated 840 historical records

📊 GENERATION SUMMARY:
   Parking Records: 100
   Traffic Records: 50
   Historical Records: 840

💾 STEP 5: LOADING DATA TO LAKEHOUSE...
🅿️ Loading parking data...
   ✅ ParkingSensorData: 100 records
🚦 Loading traffic data...
   ✅ TrafficCameraData: 50 reco

#### ENHANCED IMPLEMENTATION WITH HEDERA, MCP & POWER BI

In [4]:
# =============================================================
# 🚀 REIMAGE-AI SMART PARKING - WITH HEDERA BLOCKCHAIN
# =============================================================

print("🚀 REIMAGE-AI SMART PARKING - WITH HEDERA BLOCKCHAIN")

# =============================================================
# STEP 1: Verify Lakehouse Connection
# =============================================================
print("🔧 STEP 1: VERIFYING LAKEHOUSE CONNECTION...")

try:
    tables = spark.sql("SHOW TABLES").collect()
    print(f"✅ Lakehouse connected! Found {len(tables)} tables")
except Exception as e:
    print("❌ No Lakehouse attached! Please attach a Lakehouse manually before running.")
    raise e


# =============================================================
# STEP 2: Install Required Packages
# =============================================================
print("\n📦 STEP 2: INSTALLING REQUIRED PACKAGES...")

try:
    %pip install faker hedera-sdk-py python-dotenv cryptography
    print("✅ Packages installed successfully")
except Exception as e:
    print(f"⚠️  Package installation note: {e}")


# =============================================================
# STEP 3: Initialize Hedera Blockchain Manager
# =============================================================
print("\n⛓️ STEP 3: INITIALIZING HEDERA BLOCKCHAIN INTEGRATION...")

import hashlib
import json
from datetime import datetime, date, timedelta
import random
from faker import Faker
from pyspark.sql.types import *

# ---- Custom JSON encoder for datetime ----
class EnhancedJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (datetime, date)):
            return obj.isoformat()
        return super().default(obj)

class HederaBlockchainManager:
    def __init__(self):
        self.hedera_config = {
            "testnet_account_id": "0.0.12345",
            "testnet_private_key": "302e...",
            "topic_id": None
        }
        print("🔐 Hedera Manager Initialized (Test Mode)")

    def calculate_data_hash(self, data):
        data_str = json.dumps(data, cls=EnhancedJSONEncoder, sort_keys=True)
        return hashlib.sha256(data_str.encode()).hexdigest()

    def simulate_hedera_transaction(self, data, transaction_type="DATA_STORAGE"):
        data_hash = self.calculate_data_hash(data)
        timestamp = datetime.now().isoformat()
        receipt = {
            "transaction_id": f"0.0.{int(datetime.now().timestamp())}",
            "data_hash": data_hash,
            "timestamp": timestamp,
            "transaction_type": transaction_type,
            "status": "SUCCESS",
            "consensus_timestamp": timestamp,
            "topic_id": "0.0.456789",
            "blockchain_verified": True
        }
        print(f"🔗 Simulated Hedera Transaction {receipt['transaction_id']}")
        return receipt

    def store_on_blockchain(self, data, metadata=None):
        try:
            transaction_data = {
                "data": data,
                "metadata": metadata or {},
                "storage_timestamp": datetime.now().isoformat(),
                "data_type": "parking_sensor" if "sensor_id" in data else "traffic_data"
            }
            receipt = self.simulate_hedera_transaction(transaction_data)
            data_with_proof = data.copy()
            data_with_proof.update({
                "blockchain_tx_id": receipt["transaction_id"],
                "data_hash": receipt["data_hash"],
                "blockchain_verified": receipt["blockchain_verified"]
            })
            return data_with_proof, receipt
        except Exception as e:
            print(f"❌ Blockchain storage error: {e}")
            return data, None

hedera_manager = HederaBlockchainManager()


# =============================================================
# STEP 4: Create Enhanced Tables with Blockchain Fields
# =============================================================
print("\n📋 STEP 4: CREATING ENHANCED TABLES WITH BLOCKCHAIN FIELDS...")

tables_to_clean = [
    "ParkingSensorData", "TrafficCameraData", "HistoricalTraffic",
    "BlockchainTransactions", "ModelRegistry", "PowerBI_Metrics"
]
for t in tables_to_clean:
    try:
        spark.sql(f"DROP TABLE IF EXISTS {t}")
        print(f"✅ Dropped table: {t}")
    except:
        print(f"ℹ️  Could not drop {t}")

tables_sql = {
    "ParkingSensorData": """
        CREATE TABLE ParkingSensorData (
            sensor_id STRING,
            timestamp TIMESTAMP,
            occupancy_status BOOLEAN,
            vehicle_count INT,
            temperature DOUBLE,
            vibration_level DOUBLE,
            location STRING,
            parking_zone STRING,
            transaction_hash STRING,
            blockchain_tx_id STRING,
            data_hash STRING,
            blockchain_verified BOOLEAN
        ) USING DELTA
    """,
    "TrafficCameraData": """
        CREATE TABLE TrafficCameraData (
            camera_id STRING,
            timestamp TIMESTAMP,
            image_url STRING,
            vehicle_count INT,
            traffic_density DOUBLE,
            average_speed INT,
            congestion_level STRING,
            processed BOOLEAN,
            yolo_results STRING,
            blockchain_tx_id STRING,
            data_hash STRING,
            blockchain_verified BOOLEAN
        ) USING DELTA
    """,
    "HistoricalTraffic": """
        CREATE TABLE HistoricalTraffic (
            zone_id STRING,
            date DATE,
            hour INT,
            average_occupancy DOUBLE,
            traffic_volume INT,
            weather_condition STRING,
            event_day BOOLEAN,
            data_hash STRING
        ) USING DELTA
    """,
    "PowerBI_Metrics": """
        CREATE TABLE PowerBI_Metrics (
            metric_id STRING,
            metric_name STRING,
            metric_value DOUBLE,
            metric_timestamp TIMESTAMP,
            category STRING,
            zone_id STRING,
            data_source STRING
        ) USING DELTA
    """
}

for t, sql_stmt in tables_sql.items():
    spark.sql(sql_stmt)
    print(f"✅ Created: {t}")

print("🎉 Enhanced tables created successfully!")


# =============================================================
# STEP 5: Generate Synthetic Data with Blockchain Integration
# =============================================================
print("\n🚀 STEP 5: GENERATING SYNTHETIC DATA WITH BLOCKCHAIN...")

fake = Faker()

def generate_parking_data(num=100):
    records = []
    locations = ["Downtown", "Airport", "Mall", "Hospital"]
    zones = ["ZONE_A", "ZONE_B", "ZONE_C", "ZONE_D"]
    for _ in range(num):
        ts = fake.date_time_between(start_date="-30d", end_date="now")
        occupied = random.random() < (0.7 if 8 <= ts.hour <= 18 else 0.3)
        base = {
            "sensor_id": f"SENSOR_{random.randint(1,50):03d}",
            "timestamp": ts,
            "occupancy_status": occupied,
            "vehicle_count": 1 if occupied else 0,
            "temperature": round(random.uniform(15,35),2),
            "vibration_level": round(random.uniform(0.1,5.0),2),
            "location": random.choice(locations),
            "parking_zone": random.choice(zones),
            "transaction_hash": fake.sha256()
        }
        enhanced, receipt = hedera_manager.store_on_blockchain(base)
        records.append(enhanced)
    print(f"✅ Generated {len(records)} blockchain-backed parking records")
    return records

def generate_traffic_data(num=50):
    records = []
    for _ in range(num):
        ts = fake.date_time_between(start_date="-30d", end_date="now")
        rush = (7 <= ts.hour <= 9) or (16 <= ts.hour <= 18)
        vehicles = random.randint(20,50) if rush else random.randint(5,25)
        density = round(random.uniform(0.7,0.95),2) if rush else round(random.uniform(0.2,0.6),2)
        speed = random.randint(20,40) if rush else random.randint(40,60)
        congestion = "HIGH" if density>0.7 else "MEDIUM" if density>0.4 else "LOW"
        base = {
            "camera_id": f"CAM_{random.randint(1,20):03d}",
            "timestamp": ts,
            "image_url": f"https://trafficcams.com/{ts.strftime('%Y%m%d_%H%M%S')}.jpg",
            "vehicle_count": vehicles,
            "traffic_density": density,
            "average_speed": speed,
            "congestion_level": congestion,
            "processed": True,
            "yolo_results": json.dumps({
                "vehicles_detected": vehicles,
                "confidence": round(random.uniform(0.85,0.98),2)
            })
        }
        enhanced, receipt = hedera_manager.store_on_blockchain(base)
        records.append(enhanced)
    print(f"✅ Generated {len(records)} blockchain-backed traffic records")
    return records

def generate_historical_data(days=7):
    records=[]
    start=datetime.now()-timedelta(days=days)
    zones=["ZONE_A","ZONE_B","ZONE_C","ZONE_D"]
    weathers=["Sunny","Rainy","Cloudy"]
    for zone in zones:
        for d in range(days):
            date_=start+timedelta(days=d)
            for hour in range(24):
                occ=round(random.uniform(0.3,0.95)*(1 if 8<=hour<=18 else 0.4),3)
                vol=int(occ*random.randint(500,2000))
                weather=random.choice(weathers)
                record={
                    "zone_id":zone,
                    "date":date_.date(),
                    "hour":hour,
                    "average_occupancy":occ,
                    "traffic_volume":vol,
                    "weather_condition":weather,
                    "event_day":random.random()<0.05,
                    "data_hash":hedera_manager.calculate_data_hash({"z":zone,"d":str(date_.date()),"h":hour})
                }
                records.append(record)
    print(f"✅ Generated {len(records)} historical records")
    return records

parking_data=generate_parking_data()
traffic_data=generate_traffic_data()
historical_data=generate_historical_data()


# =============================================================
# STEP 6: Load Data into Lakehouse
# =============================================================
print("\n💾 STEP 6: LOADING ENHANCED DATA TO LAKEHOUSE...")

parking_schema=StructType([
    StructField("sensor_id",StringType()),StructField("timestamp",TimestampType()),
    StructField("occupancy_status",BooleanType()),StructField("vehicle_count",IntegerType()),
    StructField("temperature",DoubleType()),StructField("vibration_level",DoubleType()),
    StructField("location",StringType()),StructField("parking_zone",StringType()),
    StructField("transaction_hash",StringType()),StructField("blockchain_tx_id",StringType()),
    StructField("data_hash",StringType()),StructField("blockchain_verified",BooleanType())
])
traffic_schema=StructType([
    StructField("camera_id",StringType()),StructField("timestamp",TimestampType()),
    StructField("image_url",StringType()),StructField("vehicle_count",IntegerType()),
    StructField("traffic_density",DoubleType()),StructField("average_speed",IntegerType()),
    StructField("congestion_level",StringType()),StructField("processed",BooleanType()),
    StructField("yolo_results",StringType()),StructField("blockchain_tx_id",StringType()),
    StructField("data_hash",StringType()),StructField("blockchain_verified",BooleanType())
])
hist_schema=StructType([
    StructField("zone_id",StringType()),StructField("date",DateType()),
    StructField("hour",IntegerType()),StructField("average_occupancy",DoubleType()),
    StructField("traffic_volume",IntegerType()),StructField("weather_condition",StringType()),
    StructField("event_day",BooleanType()),StructField("data_hash",StringType())
])

spark.createDataFrame(parking_data,parking_schema)\
     .write.mode("overwrite").format("delta").option("overwriteSchema","true").saveAsTable("ParkingSensorData")
spark.createDataFrame(traffic_data,traffic_schema)\
     .write.mode("overwrite").format("delta").option("overwriteSchema","true").saveAsTable("TrafficCameraData")
spark.createDataFrame(historical_data,hist_schema)\
     .write.mode("overwrite").format("delta").option("overwriteSchema","true").saveAsTable("HistoricalTraffic")

print("🎉 Enhanced data loading completed!")


# =============================================================
# STEP 7: Generate Power BI Metrics
# =============================================================
print("\n📊 STEP 7: GENERATING POWER BI METRICS...")

def safe_float(v): return float(v) if v else 0.0

def generate_powerbi_metrics():
    metrics=[]
    now=datetime.now()
    try:
        blk=spark.sql("""
            SELECT COUNT(*) as total,
                   SUM(CASE WHEN blockchain_verified THEN 1 ELSE 0 END) as verified,
                   AVG(CASE WHEN blockchain_verified THEN 1.0 ELSE 0.0 END) as rate
            FROM ParkingSensorData
        """).collect()[0]
        metrics.append({"metric_id":"blk_total","metric_name":"Total Blockchain Records",
                        "metric_value":safe_float(blk["total"]),"metric_timestamp":now,
                        "category":"Blockchain","zone_id":"ALL","data_source":"Hedera"})
        metrics.append({"metric_id":"blk_rate","metric_name":"Blockchain Verification Rate",
                        "metric_value":safe_float(blk["rate"]),"metric_timestamp":now,
                        "category":"Blockchain","zone_id":"ALL","data_source":"Hedera"})
    except Exception as e:
        print(f"❌ Metric generation error: {e}")
    return metrics

metrics=generate_powerbi_metrics()
if metrics:
    spark.createDataFrame(metrics).write.mode("overwrite").format("delta").saveAsTable("PowerBI_Metrics")
    print(f"✅ Generated {len(metrics)} Power BI metrics")

# =============================================================
# FINAL SUMMARY
# =============================================================
print("\n" + "="*60)
print("🎉 REIMAGE-AI SMART PARKING WITH HEDERA - SETUP COMPLETED!")
print("="*60)

for t in ["ParkingSensorData","TrafficCameraData","HistoricalTraffic","PowerBI_Metrics"]:
    c=spark.sql(f"SELECT COUNT(*) as cnt FROM {t}").collect()[0]['cnt']
    print(f"   {t}: {c} records")

print("\n✅ NEXT STEPS:")
print("   1. Run Notebook 2 - AI Processing & MCP Integration")
print("   2. Run Notebook 3 - Power BI Dashboard & Monitoring")


StatementMeta(, d0b4adbe-9011-4d4d-acd3-20483e4fe494, 23, Finished, Available, Finished)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
🚀 REIMAGE-AI SMART PARKING - WITH HEDERA BLOCKCHAIN
🔧 STEP 1: VERIFYING LAKEHOUSE CONNECTION...
✅ Lakehouse connected! Found 11 tables

📦 STEP 2: INSTALLING REQUIRED PACKAGES...
✅ Packages installed successfully

⛓️ STEP 3: INITIALIZING HEDERA BLOCKCHAIN INTEGRATION...
🔐 Hedera Manager Initialized (Test Mode)

📋 STEP 4: CREATING ENHANCED TABLES WITH BLOCKCHAIN FIELDS...
✅ Dropped table: ParkingSensorData
✅ Dropped table: TrafficCameraData
✅ Dropped table: HistoricalTraffic
✅ Dropped table: BlockchainTransactions
✅ Dropped table: ModelRegistry
✅ Dropped table: PowerBI_Metrics
✅ Created: ParkingSensorData
✅ Created: TrafficCameraData
✅ Created: HistoricalTraffic
✅ Crea