In [None]:
import os
import numpy as np
import pandas as pd
from datetime import datetime

# Define paths
path_LT = ... #Origin folder of LT HP
path_LT_Pulses_Flag = ... #Destination folder that the results will be saved

# Load dataset the path where the modified all_summary.csv file is located 
all_summary = pd.read_csv('all_summary.csv')  

# Filter dataset
all_summary = all_summary[all_summary['Included_SPF_analysis'] == True]
all_summary_LT_ASHP = all_summary[all_summary['HP_Installed'] == "ASHP"]
all_summary_HT_ASHP = all_summary[all_summary['HP_Installed'] == "HT_ASHP"]

# Define parameters
granularity = 2  # in minutes
threshold_power = 100  # in Watts

# Process each household
for i, property_id in enumerate(all_summary_LT_ASHP['Property_ID']):
    file_path = os.path.join(path_LT, f"{property_id}.parquet")
    
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        continue

    # Load household data
    household = pd.read_parquet(file_path)

    # Convert to datetime
    household['Timestamp'] = pd.to_datetime(household['Timestamp'])

    # Filter non-zero energy consumption
    non_zero_indices = household['Whole_System_Energy_Consumed'] > 0
    if non_zero_indices.sum() == 0:
        continue  # Skip if no valid data

    first_non_zero = household[non_zero_indices].index[0]
    last_non_zero = household[non_zero_indices].index[-1]

    household = household.iloc[first_non_zero:last_non_zero + 1]

    # Resample data
    household = household.set_index('Timestamp')
    household = household.resample(f'{granularity}T').interpolate()

    # Add missing columns if necessary
    if 'Immersion_Heater_Energy_Consumed' not in household:
        household['Immersion_Heater_Energy_Consumed'] = 0
    if 'Circulation_Pump_Energy_Consumed' not in household:
        household['Circulation_Pump_Energy_Consumed'] = 0

    # Compute energy consumption
    household['Heat_Pump_Energy_Consumed'] = (
        household['Whole_System_Energy_Consumed'] -
        household['Circulation_Pump_Energy_Consumed'] -
        household['Immersion_Heater_Energy_Consumed']
    )

    # Compute power differences
    for col in [
        'Circulation_Pump_Energy_Consumed',
        'Immersion_Heater_Energy_Consumed',
        'Heat_Pump_Energy_Consumed',
        'Whole_System_Energy_Consumed',
        'Heat_Pump_Energy_Output'
    ]:
        household[f"{col}_Diff"] = household[col].diff().fillna(0)

    # Compute power in Watts
    household['Circulation_Pump_Power'] = household['Circulation_Pump_Energy_Consumed'] * 60 / 1000 / granularity
    household['Immersion_Heater_Power'] = household['Immersion_Heater_Energy_Consumed_Diff'] * 60 * 1000 / granularity
    household['Heat_Pump_Power'] = household['Heat_Pump_Energy_Consumed_Diff'] * 60 * 1000 / granularity
    household['Whole_System_Power'] = household['Whole_System_Energy_Consumed_Diff'] * 60 * 1000 / granularity
    household['Heat_Pump_Power_Output'] = household['Heat_Pump_Energy_Output_Diff'] * 60 * 1000 / granularity

    # Remove negative power values
    household['Heat_Pump_Power'] = household['Heat_Pump_Power'].clip(lower=0)

    # Identify heating pulses
    household['Heating_Pulse_On'] = household['Heat_Pump_Power'] > threshold_power
    household['Immersion_On'] = household['Immersion_Heater_Power'] > threshold_power
    household['Heating_Pulses'] = household['Heating_Pulse_On'] * household['Heat_Pump_Power']
    household['Immersion_Pulses'] = household['Immersion_On'] * household['Immersion_Heater_Power']

    # Save to .parquet file
    output_path = os.path.join(path_LT_Pulses_Flag, f"{property_id}.parquet")
    household.to_parquet(output_path, index=True)

    print(f"Processed and saved: {output_path}")