# merged daily mean mocked simulations 

In [6]:
import pandas as pd
import numpy as np

# Parameters
input_file = 'output/results/merged_daily_mean.csv'  # Path to your input CSV
output_file = 'output/results/merged_daily_mean_mocked.csv'  # Path for the output CSV
num_new_buildings = 5  # Number of new BuildingIDs to generate
fluctuation_percentage = 0.2  # ±20%

# Read the existing CSV
df = pd.read_csv(input_file)

# Ensure 'BuildingID' is numeric
# This will convert the column to numeric, setting errors='coerce' will turn non-convertible values to NaN
df['BuildingID'] = pd.to_numeric(df['BuildingID'], errors='coerce')

# Check for any NaN values in 'BuildingID' after conversion
if df['BuildingID'].isnull().any():
    raise ValueError("Some BuildingID values could not be converted to numbers. Please check your data.")

# If 'BuildingID' should be integer, convert it
df['BuildingID'] = df['BuildingID'].astype(int)

# Identify existing BuildingIDs
existing_buildings = df['BuildingID'].unique()
max_building_id = existing_buildings.max()

# Generate new BuildingIDs
new_building_ids = range(max_building_id + 1, max_building_id + 1 + num_new_buildings)

# Identify date columns (assuming they start from the third column)
date_columns = df.columns[2:]

# Create a list to hold new rows
new_rows = []

for new_id in new_building_ids:
    for _, row in df.iterrows():
        new_row = row.copy()
        new_row['BuildingID'] = new_id
        # Apply ±20% fluctuation to each date column
        for date in date_columns:
            original_value = row[date]
            if pd.isna(original_value):
                # If the original value is NaN, keep it as NaN
                new_value = original_value
            else:
                # Generate a random fluctuation factor between -20% and +20%
                factor = np.random.uniform(1 - fluctuation_percentage, 1 + fluctuation_percentage)
                new_value = original_value * factor
            new_row[date] = new_value
        new_rows.append(new_row)

# Create a DataFrame for new rows
new_df = pd.DataFrame(new_rows)

# Append the new rows to the original DataFrame
combined_df = pd.concat([df, new_df], ignore_index=True)

# Optionally, sort the DataFrame by BuildingID and VariableName
combined_df.sort_values(by=['BuildingID', 'VariableName'], inplace=True)

# Save to a new CSV
combined_df.to_csv(output_file, index=False)

print(f"Mock data generated and saved to {output_file}")


Mock data generated and saved to output/results/merged_daily_mean_mocked.csv


In [13]:
import pandas as pd
import numpy as np

# Parameters
input_file = r'D:\Documents\E_Plus_2030_py\output\results\merged_daily_mean.csv'  # Path to your input CSV
output_file = r'D:\Documents\E_Plus_2030_py\output\results\merged_daily_mean_mocked.csv'  # Path for the output CSV
num_new_buildings = 200  # Number of new BuildingIDs to generate
fluctuation_percentage = 0.2  # ±20%

# 1. Read the existing CSV
df = pd.read_csv(input_file)

# ------------------------------------------------------------------------------
# FIX: Remove leading/trailing spaces from VariableName
# ------------------------------------------------------------------------------
df['VariableName'] = df['VariableName'].astype(str).str.strip()

# 2. Define a mapping from month abbreviations to numbers
month_mapping = {
    'Jan': '01',
    'Feb': '02',
    'Mar': '03',
    'Apr': '04',
    'May': '05',
    'Jun': '06',
    'Jul': '07',
    'Aug': '08',
    'Sep': '09',
    'Oct': '10',
    'Nov': '11',
    'Dec': '12'
}

# 3. Identify date columns (assuming they start from the third column)
date_columns = df.columns[2:]

# 4. Function to convert 'DD-MMM' to 'MM/DD'
def convert_date_format(date_str):
    try:
        day, month_abbr = date_str.split('-')
        month_num = month_mapping.get(month_abbr, '00')  # Default to '00' if month not found
        return f"{month_num}/{day}"
    except ValueError:
        # If the format doesn't match, return the original string
        return date_str

# 5. Rename the date columns
new_date_columns = [convert_date_format(col) for col in date_columns]
rename_dict = dict(zip(date_columns, new_date_columns))
df.rename(columns=rename_dict, inplace=True)

# Update date_columns to the new names
date_columns = new_date_columns

# 6. Ensure 'BuildingID' is numeric
df['BuildingID'] = pd.to_numeric(df['BuildingID'], errors='coerce')

# 7. Check for any NaN values in 'BuildingID' after conversion
if df['BuildingID'].isnull().any():
    raise ValueError("Some BuildingID values could not be converted to numbers. Please check your data.")

# 8. If 'BuildingID' should be integer, convert it
df['BuildingID'] = df['BuildingID'].astype(int)

# 9. Identify existing BuildingIDs
existing_buildings = df['BuildingID'].unique()
max_building_id = existing_buildings.max()

# 10. Generate new BuildingIDs
new_building_ids = range(max_building_id + 1, max_building_id + 1 + num_new_buildings)

# 11. Create a list to hold new rows
new_rows = []

for new_id in new_building_ids:
    for _, row in df.iterrows():
        new_row = row.copy()
        new_row['BuildingID'] = new_id
        # Apply ±20% fluctuation to each date column
        for date in date_columns:
            original_value = row[date]
            if pd.isna(original_value):
                # If the original value is NaN, keep it as NaN
                new_value = original_value
            else:
                # Generate a random fluctuation factor between -20% and +20%
                factor = np.random.uniform(1 - fluctuation_percentage, 1 + fluctuation_percentage)
                new_value = original_value * factor
            new_row[date] = new_value
        new_rows.append(new_row)

# 12. Create a DataFrame for new rows
new_df = pd.DataFrame(new_rows)

# 13. Append the new rows to the original DataFrame
combined_df = pd.concat([df, new_df], ignore_index=True)

# 14. Optionally, sort the DataFrame by BuildingID and VariableName
combined_df.sort_values(by=['BuildingID', 'VariableName'], inplace=True)

# 15. Save to a new CSV
combined_df.to_csv(output_file, index=False)

print(f"Mock data generated and saved to {output_file}")


Mock data generated and saved to D:\Documents\E_Plus_2030_py\output\results\merged_daily_mean_mocked.csv


# parameteres

In [12]:
import pandas as pd
import numpy as np

# Input file
input_csv = r"D:\Documents\E_Plus_2030_py\output\assigned\master_parameters.csv" # r'output\assigned\master_parameters.csv'
# Output file for the new, mocked rows
output_csv = r'D:\Documents\E_Plus_2030_py\output\assigned\master_parameters_mock.csv'

# Number of new “copies” per unique BuildingID
N = 200

def is_float(val):
    """Check if a string can be cast to float."""
    try:
        float(val)
        return True
    except:
        return False

# Read the original dataset
df = pd.read_csv(input_csv)

# List to accumulate all newly generated rows
mocked_rows = []

# Get the unique BuildingIDs
unique_bldg_ids = df['BuildingID'].unique()

for bldg_id in unique_bldg_ids:
    # Extract rows for this building ID
    subset = df[df['BuildingID'] == bldg_id]

    # Generate N new sets of rows for this building
    for i in range(1, N+1):
        # Define a new building ID
        # Option 1: create a string like "4136730_1"
        # new_bldg_id = f"{bldg_id}_{i}"
        
        # Option 2: just do bldg_id*100 + i (if bldg_id is numeric)
        new_bldg_id = bldg_id*100 + i
        
        for _, row in subset.iterrows():
            new_row = row.copy()
            
            # Assign the new BuildingID
            new_row['BuildingID'] = new_bldg_id

            # 1) Parse assigned_value, min_value, max_value
            assigned_str = str(new_row['assigned_value']).strip()
            min_str = str(new_row['min_value']).strip()
            max_str = str(new_row['max_value']).strip()

            assigned_is_float = is_float(assigned_str)
            min_is_float = is_float(min_str)
            max_is_float = is_float(max_str)

            if assigned_is_float:
                assigned_val = float(assigned_str)
            else:
                assigned_val = assigned_str  # keep as string if not numeric

            if min_is_float:
                min_val = float(min_str)
            else:
                min_val = None

            if max_is_float:
                max_val = float(max_str)
            else:
                max_val = None

            # 2) Check for a valid numeric range
            has_valid_range = False
            if (min_val is not None and max_val is not None 
                and min_val < max_val and assigned_is_float):
                has_valid_range = True

            # 3) Mock the assigned_value
            if has_valid_range:
                # 60% chance keep same, 40% random in [min_val, max_val]
                if np.random.rand() < 0.6:
                    new_assigned = assigned_val
                else:
                    new_assigned = np.random.uniform(min_val, max_val)
            else:
                # No valid range
                if not assigned_is_float:
                    # Keep string values as is
                    new_assigned = assigned_val
                else:
                    # assigned_val is numeric
                    if assigned_val >= 100:
                        # vary by ±100
                        offset = np.random.randint(-100, 101)
                        new_assigned = assigned_val + offset
                    else:
                        # vary by ±1
                        offset = np.random.uniform(-1, 1)
                        new_assigned = assigned_val + offset

            # Update new_row
            new_row['assigned_value'] = new_assigned

            # Accumulate
            mocked_rows.append(new_row)

# Convert to DataFrame
mocked_df = pd.DataFrame(mocked_rows)

# If you want to include original data + new data, uncomment the next line:
# mocked_df = pd.concat([df, mocked_df], ignore_index=True)

# Write out to CSV
mocked_df.to_csv(output_csv, index=False)

print(f"Generated {N} new sets for each BuildingID. Saved to {output_csv}")


Generated 200 new sets for each BuildingID. Saved to D:\Documents\E_Plus_2030_py\output\assigned\master_parameters_mock.csv


# Mock Data Real

### mimicking

In [11]:
import pandas as pd
import numpy as np
import re
from datetime import datetime

def mimic_merged_daily_mean(
    input_path: str,
    output_path: str,
    rename_dates: bool = True,
    method: str = "scale",    # "scale" or "random"
    lower_bound: float = 0.3, # For "scale": ±30–50%
    upper_bound: float = 0.5,
    seed: int = 42
) -> pd.DataFrame:
    """
    Reads `merged_daily_mean.csv`, creates new data that mimics the original,
    optionally renames columns like '01-Jan' -> '01/01',
    and saves a new CSV file with the modified values.

    :param input_path: Full path to the original merged_daily_mean.csv.
    :param output_path: Where the new CSV should be written.
    :param rename_dates: Whether to attempt converting 'DD-Mmm' -> 'MM/DD' columns.
    :param method: "scale" to multiply each numeric cell by a random factor; 
                   "random" to generate random values in original min–max range.
    :param lower_bound: Lower bound of the random factor (e.g. 0.3 -> ±30%).
    :param upper_bound: Upper bound of the random factor (e.g. 0.5 -> ±50%).
    :param seed: Random seed for reproducibility.
    :return: The new pandas DataFrame.
    """
    np.random.seed(seed)

    # 1. Read the original CSV
    df_original = pd.read_csv(input_path)

    # --------------------------------------------------------------------------
    # FIX: Remove leading/trailing spaces from VariableName to avoid mismatches
    # --------------------------------------------------------------------------
    df_original["VariableName"] = df_original["VariableName"].astype(str).str.strip()

    # 2. Make a copy so we don't overwrite the original
    df_new = df_original.copy()

    # -------------------------------------------------------------------------
    # 2A. Optionally rename date-like columns from 'DD-Mon' -> 'MM/DD'
    # -------------------------------------------------------------------------
    if rename_dates:
        renamed_cols = {}
        for col in df_new.columns:
            # Look for 'DD-Mmm' format
            match = re.match(r"^(\d{2})-(\w{3})$", col)
            if match:
                day_str, month_str = match.groups()
                try:
                    dt = datetime.strptime(f"{day_str}-{month_str}-2025", "%d-%b-%Y")
                    new_col_name = dt.strftime("%m/%d")  # e.g. 01-Jan -> 01/01
                    renamed_cols[col] = new_col_name
                except ValueError:
                    pass
        
        # Actually rename the columns in df_new
        df_new.rename(columns=renamed_cols, inplace=True)

    # -------------------------------------------------------------------------
    # 3. Mimic numeric data
    # -------------------------------------------------------------------------
    skip_cols = ["BuildingID", "VariableName"]
    cols_to_modify = [
        c for c in df_new.columns
        if c not in skip_cols
    ]

    for col in cols_to_modify:
        # Convert to numeric (coercing errors to NaN, though hopefully none)
        df_new[col] = pd.to_numeric(df_new[col], errors="coerce")

        # Skip columns that are entirely NaN or non-numeric
        if df_new[col].notna().sum() == 0:
            continue

        # Two approaches:
        # ---------------------------------------------------
        # A) Scale the existing data by a random factor: ±(30%–50%).
        # ---------------------------------------------------
        if method == "scale":
            # +1 or -1 direction
            scale_direction = np.random.choice([-1, 1], size=len(df_new))
            # random magnitude in [lower_bound..upper_bound]
            scale_pct = np.random.uniform(lower_bound, upper_bound, size=len(df_new))
            factor = 1 + (scale_direction * scale_pct)
            df_new[col] = df_new[col] * factor

        # ---------------------------------------------------
        # B) Generate brand-new random data in [min..max] range
        # ---------------------------------------------------
        elif method == "random":
            old_min, old_max = df_new[col].min(), df_new[col].max()
            if old_min == old_max:
                # If there's no range, give a small offset
                old_min -= 1.0
                old_max += 1.0
            df_new[col] = np.random.uniform(old_min, old_max, size=len(df_new))

        else:
            raise ValueError("Unknown method. Choose 'scale' or 'random'.")

    # 4. Save the result
    df_new.to_csv(output_path, index=False)
    return df_new


if __name__ == "__main__":
    # Example usage:
    input_file = r"D:\Documents\E_Plus_2030_py\output\results\merged_daily_mean.csv"
    output_file = r"D:\Documents\E_Plus_2030_py\output\results\mock_merged_daily_mean.csv"

    df_mocked = mimic_merged_daily_mean(
        input_path=input_file,
        output_path=output_file,
        rename_dates=True,         # Will rename '01-Jan' -> '01/01'
        method="scale",            # or "random"
        lower_bound=0.3,
        upper_bound=0.5,
        seed=42
    )

    print("Saved new, mimicked data to:", output_file)
    print(df_mocked.head(10))


Saved new, mimicked data to: D:\Documents\E_Plus_2030_py\output\results\mock_merged_daily_mean.csv
   BuildingID                                       VariableName  \
0           0                 Cooling:EnergyTransfer [J](Hourly)   
1           0                   Electricity:Facility [J](Hourly)   
2           0                 Heating:EnergyTransfer [J](Hourly)   
3           0  MYDHW_0_WATERHEATER:Water Heater Heating Energ...   

          01/01         01/02         01/03         01/04         01/05  \
0  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   
1  4.637891e+06  1.826048e+06  4.519129e+06  2.189475e+06  4.587645e+06   
2  5.356259e+06  8.549588e+06  8.411575e+06  4.913425e+06  5.444363e+06   
3  2.280979e+07  1.698950e+07  4.727667e+07  2.095039e+07  4.712216e+07   

          01/06         01/07         01/08  ...         12/22         12/23  \
0  0.000000e+00  0.000000e+00  0.000000e+00  ...  0.000000e+00  0.000000e+00   
1  1.695626e+06  4.60470