In [1]:
import os
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

In [2]:
def scaled_normal(x, mu, sigma, min_val, max_val):
    pdf = np.exp(-0.5 * ((x - mu) / sigma) ** 2)
    scaled = (pdf - np.min(pdf)) / (np.max(pdf) - np.min(pdf))
    return scaled * (max_val - min_val) + min_val

In [3]:
def smooth_plateau_profile(
    hours,
    start=9,
    end=17,
    rise_std=1.5,
    fall_std=1.5,
    min_val=0.2,
    max_val=1.4,
    noise_std=0.05,
    seed=None
):
    if seed is not None:
        np.random.seed(seed)

    # CDF-based edges
    rise = 0.5 * (1 + np.tanh((hours - start) / rise_std))
    fall = 0.5 * (1 - np.tanh((hours - end) / fall_std))
    shape = rise * fall

    # Scale to [min_val, max_val]
    profile = min_val + (max_val - min_val) * shape

    # Add noise during plateau hours
    noise = np.random.normal(0, noise_std, size=hours.shape)
    profile += noise * shape  # Noise only where shape > 0

    return profile

In [4]:
def get_temperature():
    hours = np.arange(24)
    min_temp = random.randint(10, 17)
    max_temp = random.randint(21, 32)
    temp_profile = scaled_normal(hours, mu=14, sigma=4, min_val=min_temp, max_val=max_temp)
    temp_profile += np.random.normal(0, 0.3, size=hours.shape)  # add small noise
    return temp_profile

In [5]:
def get_humidity(seed=None):
    if seed is not None:
        np.random.seed(seed)
        random.seed(seed)

    hours = np.arange(24)
    mu = random.choice([13, 14, 15, 16])
    sigma = 5

    min_val = random.uniform(30, 40)
    max_val = random.uniform(55, 65)

    humidity = scaled_normal(hours, mu=mu, sigma=sigma, min_val=min_val, max_val=max_val)
    humidity += np.random.normal(0, 0.5, size=hours.shape)

    return np.clip(humidity, 20, 100)

In [6]:
def get_co2(seed=None):
    hours = np.arange(24)
    return smooth_plateau_profile(
        hours,
        start=np.random.randint(8, 10),
        end=np.random.randint(17, 19),
        rise_std=1.8,
        fall_std=1.8,
        min_val=420,
        max_val=np.random.randint(650, 800),
        noise_std=5,                      
        seed=seed
    )

In [7]:
def get_electricity(seed=None, holiday=False):
    hours = np.arange(24)
    if holiday:
        return smooth_plateau_profile(
            hours,
            start=np.random.randint(8, 10),
            end=np.random.randint(17, 19),
            rise_std=1.5,
            fall_std=1.5,
            min_val=0.2,
            max_val=round(np.random.uniform(1.2, 1.6), 2),
            noise_std=0.02,
            seed=seed
        )
    else:
        return smooth_plateau_profile(
            hours,
            start=np.random.randint(8, 10),
            end=np.random.randint(17, 19),
            rise_std=1.5,
            fall_std=1.5,
            min_val=0.2,
            max_val=round(np.random.uniform(0.6, 1.0), 2),
            noise_std=0.02,
            seed=seed
        )


In [8]:
def generate_and_save_per_room_data(
    n_floors=4, rooms_per_floor=19, days=356, start_time="2025-01-01 00:00", output_dir="rooms", seed=42
):
    np.random.seed(seed)
    os.makedirs(output_dir, exist_ok=True)

    total_hours = 24
    ts = datetime.strptime(start_time, "%Y-%m-%d %H:%M")

    for floor in range(n_floors):
        for room in range(rooms_per_floor):
            rows = []
            for day in range(days):
                room_id = f"F{floor+1}_R{room+1}"
                area = np.random.randint(12, 35)  # square meters
                num_windows = np.random.randint(1, 5)
                window_area = np.round(num_windows * np.random.uniform(0.5, 1.5), 2)

                holiday = True if day % 7 == 5 or day % 7 == 6 else False
                temperatures = get_temperature()
                humidities = get_humidity(seed)
                co2s = get_co2(seed)
                electricities = get_electricity(seed, holiday)

                for i in range(total_hours):
                    ts = ts + timedelta(hours=1)
                    hour = ts.hour
                    day = ts.day
                    month = ts.month

                    temperature = temperatures[hour]
                    humidity = humidities[hour]
                    co2 = co2s[hour]
                    electricity = electricities[hour]

                    rows.append({
                        "timestamp": ts,
                        "room_id": room_id,
                        "area": area,
                        "num_windows": num_windows,
                        "window_area": window_area,
                        "hour": str(hour),
                        "day": str(day),
                        "month": str(month),
                        "temperature": round(temperature, 2),
                        "humidity": round(humidity, 2),
                        "co2": round(co2, 2),
                        "electricity": round(electricity, 3)
                    })

            df_room = pd.DataFrame(rows)
            filename = os.path.join(output_dir, f"{room_id}.csv")
            df_room.to_csv(filename, index=False)

In [9]:
generate_and_save_per_room_data()