In [0]:
import json
import time
import random
from datetime import datetime
from pyspark.sql.functions import lit
from pyspark.sql import SparkSession

# Initialize Spark Session (if not already in a Databricks notebook)
spark = SparkSession.builder.appName("PowerUsageSimulator").getOrCreate()

# Configuration
output_path = "/Volumes/na-dbxtraining/biju_raw/biju_vol/powerdata/raw_data/"
num_customers = 50
min_kwh = 0.05
max_kwh = 5.5
interval_seconds = 10 # Generate a new batch of data every 10 seconds

def generate_power_reading():
    """Generates a single fake power reading for a specific customer and meter."""
    # Generate customer_id in the format CUST00001, CUST00002, etc.
    customer_id_int = random.randint(1, num_customers)
    customer_id = f"CUST{customer_id_int:05d}"
    
    # Generate metername in the format MTR00001, MTR00002, etc.
    metername_int = random.randint(1, num_customers)
    metername = f"MTR{metername_int:05d}"
    
    kwh_reading = round(random.uniform(min_kwh, max_kwh), 3)
    timestamp = datetime.now().isoformat()
    
    return {
        "customer_id": customer_id,
        "metername": metername,
        "kwh_reading": kwh_reading,
        "timestamp": timestamp,
        "raw_source": "simulated_smart_meter"
    }

def write_data_to_file():
    """Generates a batch of readings and writes them to a JSON file."""
    # Generate a random number of readings for each batch
    readings = [generate_power_reading() for _ in range(random.randint(5, 20))]
    
    # Create a temporary DataFrame
    df = spark.createDataFrame(readings)

    # Write the DataFrame to a JSON file in the specified path
    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
    file_name = f"power_readings_{current_time}.json"
    full_path = f"{output_path}/{file_name}"
    
    df.write.json(full_path, mode="overwrite")
    print(f"Generated {len(readings)} new readings to {full_path}")
    
# Main loop to continuously generate data
while True:
    try:
        write_data_to_file()
        time.sleep(interval_seconds)
    except Exception as e:
        print(f"An error occurred: {e}")
        time.sleep(60) # Wait before retrying

In [0]:
%sql
select * from `na-dbxtraining`.biju_bronze.bronze_plans

In [0]:
%sql
select * from `na-dbxtraining`.biju_bronze.bronze_customers

In [0]:
%sql
select * from `na-dbxtraining`.biju_bronze.bronze_meter_readings

In [0]:
%sql
select * from `na-dbxtraining`.biju_silver.silver_enriched_meter_readings

In [0]:
%sql
select * from `na-dbxtraining`.biju_gold.gold_daily_customer_kwh_summary

In [0]:
%sql
select * from `na-dbxtraining`.biju_gold.gold_daily_customer_kwh_summary