# Merge home and boiler data


In [None]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

# Files needed as input
home_weather_locations_file_path = "home_weather_locations.parquet"
home_boilers_excel_file_path = "homes_boilers.xlsx"
boilers_excel_file_path = "boilers.xlsx"
rhc_preprocessed_poperties_file='rhc_preprocessed_properties.parquet'

# Also needed
# avanta_ace_28c_Hs.csv
# calenta_ace_28c_Hs.csv
# calenta_ace_40l_Hs.csv
# tzerra_ace_39c_Hs.csv
# avanta_ace_35c_Hs.csv
# calenta_ace_40c_Hs.csv
# tzerra_ace_28c_Hs.csv
# tzerra_ace_matic_35c_Hs.csv

# Files written as output
home_data_file_path = "home_data.parquet"
boiler_returntemp_load_efficiency_file_path = "boiler_returntemp_load_efficiency.parquet"
boiler_returntemp_efficiency_file_path = "boiler_returntemp_efficiency.parquet"


### Reading homes with enough data from a parquet file

In [None]:
%%time

# Attempt to read the Parquet file
try:
    df_prep = pd.read_parquet(
        rhc_preprocessed_poperties_file, 
        engine='pyarrow',
        dtype_backend='numpy_nullable'
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")

In [None]:
home_ids = list(df_prep.index.unique('id').dropna())

In [None]:
home_ids

## Get home weather locations

In [None]:
%%time
# read home data from Parquet file
try:
    df_homes_weather = pd.read_parquet(
        home_weather_locations_file_path, 
        engine='pyarrow',
        dtype_backend='numpy_nullable'
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")


In [None]:
df_homes_weather = df_homes_weather.loc[home_ids].reset_index()

In [None]:
df_homes_weather

## Get boiler related data

In [None]:
df_homes_boilers = pd.read_excel(home_boilers_excel_file_path).rename(columns={'pseudonym': 'id'})


In [None]:
df_homes_boilers

In [None]:
df_boilers = pd.read_excel(boilers_excel_file_path)[['brand_model',
                                                     'parHePowerMin__0.1kW',
                                                     'parHePowerMax__0.1kW',
                                                     'parHeFanRpmMin.Minimum ', # note the space at the end!
                                                     'parHeFanRpmChMax.Maximum',
                                                     'efficiency_table_temp_ret_degC_eta_sup_dhw__0'
                                                    ]]

In [None]:
df_boilers['power_ch_min__kW'] = df_boilers['parHePowerMin__0.1kW'] * 0.1
df_boilers = df_boilers.drop(columns=['parHePowerMin__0.1kW'])

In [None]:
df_boilers['power_ch_max__kW'] = df_boilers['parHePowerMax__0.1kW'] * 0.1
df_boilers = df_boilers.drop(columns=['parHePowerMax__0.1kW'])

In [None]:
df_boilers = df_boilers.rename(columns={'parHeFanRpmMin.Minimum ': 'min_fan_rotations__min_1',
                                        'parHeFanRpmChMax.Maximum': 'max_ch_fan_rotations__min_1',
                                        'efficiency_table_temp_ret_degC_eta_sup_dhw__0': 'table_eta_ch_hhv__W0'})

In [None]:
df_boilers

## Merge other home data

In [None]:
df_homes = pd.merge(df_homes_weather, df_homes_boilers, on='id', how='left')

In [None]:
df_homes

In [None]:
df_homes = pd.merge(df_homes, df_boilers, on='brand_model', how='left')

In [None]:
df_homes.set_index('id', inplace=True)

In [None]:
df_homes

## Write merged DataFrame to file

In [None]:
%%time 
df_homes.to_parquet(home_data_file_path, index=True, engine='pyarrow')

# Process boiler efficiency data

In [None]:
boiler_returntemp_load_eta_ch_hhv__W0 = pd.DataFrame()

In [None]:
# Iterate over df_boilers
for idx, row in df_boilers.iterrows():
    
    boiler_model = row['brand_model']
    eta_file = row['table_eta_ch_hhv__W0']
    
    # Read efficiency table
    eta_ch_hhv__W0 = pd.read_csv(eta_file, index_col=0)
    
    # Convert percentage efficiencies to fractions
    eta_ch_hhv__W0 = eta_ch_hhv__W0 / 100.0

    # reformat DataFrame
    eta_ch_hhv__W0 = eta_ch_hhv__W0.stack().to_frame(name='eta_ch_hhv__W0')
    
    # Ensure 'temp_ret__degC' and 'p_load__kW0' are of type float32
    eta_ch_hhv__W0.index = pd.MultiIndex.from_tuples(
        [(boiler_model, float(load), float(temp)) for load, temp in eta_ch_hhv__W0.index],
        names=['boiler_model', 'p_load__kW0', 'temp_ret__degC', ]
    )

    boiler_returntemp_load_eta_ch_hhv__W0 = pd.concat([boiler_returntemp_load_eta_ch_hhv__W0, eta_ch_hhv__W0], axis=0)


In [None]:
boiler_returntemp_load_eta_ch_hhv__W0.info()

In [None]:
boiler_returntemp_load_eta_ch_hhv__W0.index.names

In [None]:
boiler_returntemp_load_eta_ch_hhv__W0

In [None]:
# Filter out rows where eta_ch_hhv__W0 is null
df_filtered = boiler_returntemp_load_eta_ch_hhv__W0[boiler_returntemp_load_eta_ch_hhv__W0['eta_ch_hhv__W0'].notnull()].reset_index()

In [None]:
df_filtered.groupby('boiler_model').agg({
    'temp_ret__degC': ['min', 'max'],
    'p_load__kW0': ['min', 'max']
}).reset_index()

## Write boiler efficiency data per boiler model, return temperature and load

In [None]:
%%time 
boiler_returntemp_load_eta_ch_hhv__W0.to_parquet(boiler_returntemp_load_efficiency_file_path, index=True, engine='pyarrow')

In [None]:
boiler_returntemp_load_eta_ch_hhv__W0.mean()

# Calculate mean efficiency across loads

In [None]:
# Group by 'boiler_model' and 'temp_ret__degC' and compute the mean efficiency
boiler_returntemp_eta_ch_hhv__W0 = boiler_returntemp_load_eta_ch_hhv__W0.groupby(['boiler_model', 'temp_ret__degC']).mean()


## Write boiler efficiency data per boiler model, return temperature and load

In [None]:
%%time 
boiler_returntemp_eta_ch_hhv__W0.to_parquet(boiler_returntemp_efficiency_file_path, index=True, engine='pyarrow')

In [None]:
boiler_returntemp_eta_ch_hhv__W0.mean()