# merged daily mean

In [6]:
import pandas as pd
import numpy as np

# Parameters
input_file = 'output/results/merged_daily_mean.csv'  # Path to your input CSV
output_file = 'output/results/merged_daily_mean_mocked.csv'  # Path for the output CSV
num_new_buildings = 5  # Number of new BuildingIDs to generate
fluctuation_percentage = 0.2  # ±20%

# Read the existing CSV
df = pd.read_csv(input_file)

# Ensure 'BuildingID' is numeric
# This will convert the column to numeric, setting errors='coerce' will turn non-convertible values to NaN
df['BuildingID'] = pd.to_numeric(df['BuildingID'], errors='coerce')

# Check for any NaN values in 'BuildingID' after conversion
if df['BuildingID'].isnull().any():
    raise ValueError("Some BuildingID values could not be converted to numbers. Please check your data.")

# If 'BuildingID' should be integer, convert it
df['BuildingID'] = df['BuildingID'].astype(int)

# Identify existing BuildingIDs
existing_buildings = df['BuildingID'].unique()
max_building_id = existing_buildings.max()

# Generate new BuildingIDs
new_building_ids = range(max_building_id + 1, max_building_id + 1 + num_new_buildings)

# Identify date columns (assuming they start from the third column)
date_columns = df.columns[2:]

# Create a list to hold new rows
new_rows = []

for new_id in new_building_ids:
    for _, row in df.iterrows():
        new_row = row.copy()
        new_row['BuildingID'] = new_id
        # Apply ±20% fluctuation to each date column
        for date in date_columns:
            original_value = row[date]
            if pd.isna(original_value):
                # If the original value is NaN, keep it as NaN
                new_value = original_value
            else:
                # Generate a random fluctuation factor between -20% and +20%
                factor = np.random.uniform(1 - fluctuation_percentage, 1 + fluctuation_percentage)
                new_value = original_value * factor
            new_row[date] = new_value
        new_rows.append(new_row)

# Create a DataFrame for new rows
new_df = pd.DataFrame(new_rows)

# Append the new rows to the original DataFrame
combined_df = pd.concat([df, new_df], ignore_index=True)

# Optionally, sort the DataFrame by BuildingID and VariableName
combined_df.sort_values(by=['BuildingID', 'VariableName'], inplace=True)

# Save to a new CSV
combined_df.to_csv(output_file, index=False)

print(f"Mock data generated and saved to {output_file}")


Mock data generated and saved to output/results/merged_daily_mean_mocked.csv


# parameteres

In [10]:
import pandas as pd
import numpy as np

# Input file
input_csv = r'output\assigned\master_parameters.csv'
# Output file for the new, mocked rows
output_csv = r'output\assigned\master_parameters_mock.csv'

# Number of new “copies” per unique BuildingID
N = 10

def is_float(val):
    """Check if a string can be cast to float."""
    try:
        float(val)
        return True
    except:
        return False

# Read the original dataset
df = pd.read_csv(input_csv)

# List to accumulate all newly generated rows
mocked_rows = []

# Get the unique BuildingIDs
unique_bldg_ids = df['BuildingID'].unique()

for bldg_id in unique_bldg_ids:
    # Extract rows for this building ID
    subset = df[df['BuildingID'] == bldg_id]

    # Generate N new sets of rows for this building
    for i in range(1, N+1):
        # Define a new building ID
        # Option 1: create a string like "4136730_1"
        # new_bldg_id = f"{bldg_id}_{i}"
        
        # Option 2: just do bldg_id*100 + i (if bldg_id is numeric)
        new_bldg_id = bldg_id*100 + i
        
        for _, row in subset.iterrows():
            new_row = row.copy()
            
            # Assign the new BuildingID
            new_row['BuildingID'] = new_bldg_id

            # 1) Parse assigned_value, min_value, max_value
            assigned_str = str(new_row['assigned_value']).strip()
            min_str = str(new_row['min_value']).strip()
            max_str = str(new_row['max_value']).strip()

            assigned_is_float = is_float(assigned_str)
            min_is_float = is_float(min_str)
            max_is_float = is_float(max_str)

            if assigned_is_float:
                assigned_val = float(assigned_str)
            else:
                assigned_val = assigned_str  # keep as string if not numeric

            if min_is_float:
                min_val = float(min_str)
            else:
                min_val = None

            if max_is_float:
                max_val = float(max_str)
            else:
                max_val = None

            # 2) Check for a valid numeric range
            has_valid_range = False
            if (min_val is not None and max_val is not None 
                and min_val < max_val and assigned_is_float):
                has_valid_range = True

            # 3) Mock the assigned_value
            if has_valid_range:
                # 60% chance keep same, 40% random in [min_val, max_val]
                if np.random.rand() < 0.6:
                    new_assigned = assigned_val
                else:
                    new_assigned = np.random.uniform(min_val, max_val)
            else:
                # No valid range
                if not assigned_is_float:
                    # Keep string values as is
                    new_assigned = assigned_val
                else:
                    # assigned_val is numeric
                    if assigned_val >= 100:
                        # vary by ±100
                        offset = np.random.randint(-100, 101)
                        new_assigned = assigned_val + offset
                    else:
                        # vary by ±1
                        offset = np.random.uniform(-1, 1)
                        new_assigned = assigned_val + offset

            # Update new_row
            new_row['assigned_value'] = new_assigned

            # Accumulate
            mocked_rows.append(new_row)

# Convert to DataFrame
mocked_df = pd.DataFrame(mocked_rows)

# If you want to include original data + new data, uncomment the next line:
# mocked_df = pd.concat([df, mocked_df], ignore_index=True)

# Write out to CSV
mocked_df.to_csv(output_csv, index=False)

print(f"Generated {N} new sets for each BuildingID. Saved to {output_csv}")


Generated 10 new sets for each BuildingID. Saved to output\assigned\master_parameters_mock.csv
