In [4]:
%pip install numpy pandas


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\Richi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [24]:
import numpy as np
import pandas as pd
import random
import os

# Define energy consumption profiles based on more detailed research
def generate_user_types():
    user_types = {
        'Restaurant_AllDay': {
            'baseline_per_sqft': 0.003,  # Refrigeration and overnight use
            'prep_multiplier': 0.6,  # Medium energy use during prep time
            'peak_multiplier': 1.5,  # Highest energy use during service times
            'size_range': (2000, 5000),  # Square footage for small to large restaurants
            'service_hours': [(7, 9), (12, 14), (18, 21)],  # Breakfast, lunch, dinner
            'prep_hours': [(6, 7), (11, 12), (17, 18)],  # Prep time before service
            'closed_hours': [(0, 6), (22, 24)],  # Overnight low energy
        },
        'Restaurant_LunchDinner': {
            'baseline_per_sqft': 0.0025,
            'prep_multiplier': 0.5,
            'peak_multiplier': 1.7,
            'size_range': (1500, 4000),
            'service_hours': [(11, 14), (18, 21)],
            'prep_hours': [(10, 11), (17, 18)],
            'closed_hours': [(0, 10), (21, 24)],
        },
        'Restaurant_DinnerOnly': {
            'baseline_per_sqft': 0.002,
            'prep_multiplier': 0.4,
            'peak_multiplier': 1.8,
            'size_range': (1000, 3000),
            'service_hours': [(18, 21)],
            'prep_hours': [(12, 18)],
            'closed_hours': [(0, 12), (21, 24)],
        },
        'Hotel': {
            'baseline_per_sqft': 0.004,  # HVAC and baseline lighting
            'prep_multiplier': 0.7,  # Increased use during cleaning and preparation hours
            'peak_multiplier': 1.3,  # Higher use during peak occupancy hours
            'size_range': (10000, 50000),  # Small to large hotels
            'service_hours': [(6, 9), (18, 23)],  # Morning and evening peaks
            'prep_hours': [(9, 18)],  # Cleaning and prep time
            'closed_hours': [(0, 6)],  # Overnight low energy
        },
    }
    return user_types

# Generate a daily energy curve with detailed ramping
def generate_daily_curve(user_type, size, day_of_week, season_factor):
    baseline = user_type['baseline_per_sqft'] * size
    daily_curve = np.ones(48) * baseline  # Start with refrigeration-only baseline

    for start, end in user_type['prep_hours']:
        start_slot = int(start * 2)
        end_slot = int(end * 2)
        ramp_curve = np.linspace(1, user_type['prep_multiplier'], end_slot - start_slot)
        daily_curve[start_slot:end_slot] *= ramp_curve

    for start, end in user_type['service_hours']:
        start_slot = int(start * 2)
        end_slot = int(end * 2)
        ramp_curve = np.linspace(user_type['prep_multiplier'], user_type['peak_multiplier'], (end_slot - start_slot) // 2)
        ramp_curve = np.concatenate((ramp_curve, ramp_curve[::-1]))
        daily_curve[start_slot:end_slot] *= ramp_curve

    for start, end in user_type['closed_hours']:
        start_slot = int(start * 2)
        end_slot = int(end * 2)
        daily_curve[start_slot:end_slot] *= 0.5  # Overnight energy use reduced

    if day_of_week >= 5:  # Weekend adjustment
        daily_curve *= 0.9

    daily_curve *= (1 + season_factor)  # Add seasonality

    return daily_curve

# Generate customer data
def generate_customer_data(user_types, num_customers, random_variation=0.1):
    all_customers_data = []
    customer_types = []
    np.random.seed(42)

    monday_closures = random.sample(range(num_customers), 5)  # Randomly close 5 restaurants on Mondays
    closed_days = {customer_id: random.sample(range(365), random.randint(2, 5)) for customer_id in range(num_customers)}

    for customer_id in range(num_customers):
        if customer_id < 10:
            user_type_name = 'Hotel'
        else:
            user_type_name = random.choice([key for key in user_types.keys() if key != 'Hotel'])

        user_type = user_types[user_type_name]
        size = random.randint(*user_type['size_range'])
        size *= random.uniform(0.9, 1.1)  # Random scaling within ±10%

        customer_profile = []
        for day in range(365):
            season_factor = np.sin(2 * np.pi * day / 365) * 0.2
            day_of_week = day % 7

            if customer_id in closed_days and day in closed_days[customer_id]:
                # Business closed for the day, only refrigeration
                daily_curve = np.ones(48) * user_type['baseline_per_sqft'] * size * 0.5
            elif customer_id in monday_closures and day_of_week == 0:
                # Business closed on Mondays
                daily_curve = np.ones(48) * user_type['baseline_per_sqft'] * size * 0.5
            else:
                daily_curve = generate_daily_curve(user_type, size, day_of_week, season_factor)

            # Add random variation
            daily_curve *= (1 + np.random.normal(0, random_variation, 48))
            customer_profile.extend(daily_curve)

        all_customers_data.append(customer_profile)
        customer_types.append(f"{user_type_name.replace('_', ' ')} {customer_id + 1}")

    all_customers_data = np.array(all_customers_data)
    index = pd.date_range(start='2022-01-01', end='2022-12-31 23:30', freq='30T')

    return pd.DataFrame(all_customers_data.T, index=index, columns=customer_types)

# Calculate total kWh for all customers
def calculate_total_kwh(data):
    total_kwh = data.sum().sum()
    print(f"Total kWh for all customers combined: {total_kwh:.2f} kWh")
    return total_kwh

# Main function
def main():
    user_types = generate_user_types()
    num_customers = 50
    random_variation = 0.1

    customer_data = generate_customer_data(user_types, num_customers, random_variation)

    output_dir = 'output'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_path = os.path.join(output_dir, 'realistic_synthetic_customer_data.csv')
    customer_data.to_csv(output_path)
    print(f"Synthetic data generated and saved to '{output_path}'")

    # Calculate and display total kWh
    calculate_total_kwh(customer_data)

if __name__ == "__main__":
    main()


  index = pd.date_range(start='2022-01-01', end='2022-12-31 23:30', freq='30T')


Synthetic data generated and saved to 'output\realistic_synthetic_customer_data.csv'
Total kWh for all customers combined: 18454613.94 kWh
