# REDUCEDHEATCARB analysis

This JupyterLabs notebook can be used for physics ifnormed machine learning analysis in the REDUCEDHEATCAB project.
Don't forget to install the requirements listed in [requirements.txt](../requirements.txt) first!

## Setting the stage

First several imports and variables need to be defined


### Imports and generic settings

In [None]:
import numpy as np
import pandas as pd

%load_ext autoreload

    
from tqdm.notebook import tqdm

from gekko import GEKKO

import sys
sys.path.append('../data/')
sys.path.append('../view/')
sys.path.append('../analysis/')

from plotter import Plot
import matplotlib.pyplot as plt
%matplotlib inline
%matplotlib widget


from nfh_utils import *

# usually, two decimals suffice for displaying DataFrames (NB internally, precision may be higher)
pd.options.display.precision = 2

from rhc_analysis import Learner

# rhc_preprocessed_poperties_file='rhc_preprocessed_properties_intv_5_min.parquet'
rhc_preprocessed_poperties_file='rhc_preprocessed_properties_intv_1_min.parquet'
#rhc_preprocessed_poperties_file='rhc_heat_dist_preprocessed_properties.parquet'

home_data_file_path = "home_data.parquet"
boiler_returntemp_load_efficiency_file_path = "boiler_returntemp_load_efficiency.parquet"
boiler_returntemp_efficiency_file_path = "boiler_returntemp_efficiency.parquet"

# File for intermediate output (including preprocessing that may likely needs to migrate to the GEKKO model code for the what-if simulations)
rhc_heat_dist_preprocessed_poperties_file='rhc_heat_dist_preprocessed_properties.parquet'


### Reading preprocessed interpolated properties from a parquet file

In [None]:
%%time

# Attempt to read the Parquet file
try:
    df_prep = pd.read_parquet(
        rhc_preprocessed_poperties_file, 
        engine='pyarrow',
        dtype_backend='numpy_nullable'
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")

In [None]:
#sorting the DataFrame index is needed to get good performance on certain filters
#this guarding code to check whether DataFrames are properly sorted
if not df_prep.index.is_monotonic_increasing:
    print('df needed index sorting')
    df_prep = df_prep.sort_index()  

In [None]:
print(f"df_prep.count().sum(): {df_prep.count().sum():_}")

In [None]:
df_prep.info()

In [None]:
print("[\n'","', \n'".join(sorted(df_prep.columns)),"'\n]")

In [None]:
%autoreload 2
units_to_mathtext = property_types = {
    'degC' : r'$°C$',
    'ppm' : r'$ppm$',
    '0' : r'$[-]$',
    'bool': r'$0 = False; 1 = True$',
    'p' : r'$persons$',
    'W' : r'$W$',
    'W_m_2' : r'$W/m^{2}$',
    'm_s_1' : r'$m/s$'    
}

In [None]:
# visuaize all input data
df_plot = df_prep

In [None]:
list(df_plot.index.unique('id').dropna())

In [None]:
df_plot.index.unique('id').dropna()

In [None]:
# df_plot.loc[[401632]][[prop for prop in df_plot.columns.values if prop.split('__')[-1] in ('degC', 'W', '0', 'bool', 'ppm', 'W_m_2')]]

In [None]:
#Plot all properties from all sources for all ids
#Plot.dataframe_preprocessed_plot(df_plot.loc[[401632]][[prop for prop in df_plot.columns.values if prop.split('__')[-1] in ('degC', 'ppm', 'W_m_2')]], units_to_mathtext)

# Calculate additional properties 
We may have to move some of these calculations to inside the GEKKO Python model code (e.g. for the what-if scenario simulation)

### Calculating electricity data

In [None]:
%%time
df_prep['calculated_device_p1-reader_e_use__W'] = df_prep['device_p1-reader_e_use_hi__W'] + df_prep['device_p1-reader_e_use_lo__W']
df_prep['calculated_device_p1-reader_e_ret__W'] = df_prep['device_p1-reader_e_ret_hi__W'] + df_prep['device_p1-reader_e_ret_lo__W'] 


### Reading and calculating boiler data 

#### Reading home metadata

In [None]:
%%time
# Attempt to read the Parquet file
try:
    df_homes = pd.read_parquet(
        home_data_file_path, 
        engine='pyarrow',
        dtype_backend='numpy_nullable'
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")

#### Merge home data into df_prep

In [None]:
# Merge df_prep with df_homes to get the brand_model for each id
df_prep = df_prep.reset_index().merge(df_homes, on='id').set_index(['id', 'timestamp'])

## Calculate actual gas input power

### Calculate actual gas input power for central heating based on boiler fan speed

In [None]:
%%time                                         
# Create value for boiler load fraction (how far along the current fan RPM is between the min and max)
df_prep.loc[:,'calculated_fan_frac__0'] = (
    (df_prep['batch_import_remeha_fan_rotations__min_1'] - df_prep['fan_min_ch_rotations_min_1'])
    / 
    (df_prep['fan_max_ch_rotations__min_1'] - df_prep['fan_min_ch_rotations_min_1'])
).astype('Float32')

# Calculate input power of G25.3 gas as a lineair interpolation btween Qnh_min_lhv__kW and Qnh_max_lhv__kW based on calculated_fan_frac__0
df_prep.loc[:,'calculated_g25_3_use_boiler_lhv__W'] = (
    (df_prep['Qnh_max_lhv__kW'] * df_prep['calculated_fan_frac__0'] 
    +
    df_prep['Qnh_min_lhv__kW'] * (1- df_prep['calculated_fan_frac__0'])
    )
    * 1e3
    * df_prep['batch_import_remeha_gas_valve_open__bool'] # make sure gas use = 0 when gas valve is closed
)

# use the actual higher heating value
df_prep.loc[:,'gas_std_hhv__J_m_3'] = df_prep['batch_import_EDSN_actual_gas_std_hhv__J_m_3']

# alternatively, use average of actual higher heating value
# df_prep.loc[:,'gas_std_hhv__J_m_3'] = df_prep[df_prep['batch_import_remeha_boiler_status_burning_ch__bool'] == True]['batch_import_EDSN_actual_gas_std_hhv__J_m_3'].mean()

# alternative, simpler solution: assume higher heating value of groningen gas
# df_prep.loc[:,'gas_std_hhv__J_m_3'] = gas_groningen_nl_avg_std_hhv__J_m_3

# Calorific value conversion factor from G25.3 gas to the actual gas used 
df_prep.loc[:,'gas_calorific_factor_g25_3_lhv_to_actual_hhv__J0'] = df_prep['gas_std_hhv__J_m_3'] / gas_g25_3_ref_lhv__J_m_3

# Pressure conversion factor from reference pressure (1 atm) to actual pressure (KNMI), including constant overpressure
df_prep.loc[:,'gas_pressure_factor_ref_to_actual__J0'] = (
    (df_prep['batch_import_KNMI_air__Pa'] + overpressure_gas_nl_avg__Pa) 
    / 
    (P_std__Pa + overpressure_gas_nl_avg__Pa)
)

# Temperature conversion factor from reference temperature to actual temperature (of which we only know an average value based on ACM)
df_prep.loc[:,'gas_temp_factor_ref_to_actual__J0'] = temp_gas_ref__K / temp_gas_avg_nl__K

In [None]:
%%time                                         
# estimate gas input power at actual pressure and temperature 
df_prep.loc[:,'calculated_g_use_boiler_hhv__W'] = (
    df_prep['calculated_g25_3_use_boiler_lhv__W']
    *
    df_prep['gas_calorific_factor_g25_3_lhv_to_actual_hhv__J0']
    *
    df_prep['gas_pressure_factor_ref_to_actual__J0'] 
    *
    df_prep['gas_temp_factor_ref_to_actual__J0']
)

In [None]:
%%time                                         
# estimate gas input power at actual pressure and temperature 
df_prep.loc[:,'calculated_remeha_g_use_dhw_hhv__W'] = (
    df_prep['batch_import_remeha_g_use_dhw_lhv__W']
    *
    df_prep['gas_calorific_factor_g25_3_lhv_to_actual_hhv__J0']
    *
    df_prep['gas_pressure_factor_ref_to_actual__J0'] 
    *
    df_prep['gas_temp_factor_ref_to_actual__J0']
)

df_prep.loc[:,'calculated_remeha_g_use_ch_hhv__W'] = (
    df_prep['batch_import_remeha_g_use_ch_lhv__W']
    *
    df_prep['gas_calorific_factor_g25_3_lhv_to_actual_hhv__J0']
    *
    df_prep['gas_pressure_factor_ref_to_actual__J0'] 
    *
    df_prep['gas_temp_factor_ref_to_actual__J0']
)

df_prep.loc[:,'calculated_remeha_g_use_hhv__W'] = (
    df_prep['calculated_remeha_g_use_ch_hhv__W'] 
    + 
    df_prep['calculated_remeha_g_use_dhw_hhv__W']
)

In [None]:
df_prep.loc[:,'calculated_g_use_ch_hhv__W'] = df_prep['calculated_g_use_boiler_hhv__W'] * df_prep['batch_import_remeha_boiler_status_burning_ch__bool']
df_prep.loc[:,'calculated_g_use_dhw_hhv__W'] = df_prep['calculated_g_use_boiler_hhv__W'] * df_prep['batch_import_remeha_boiler_status_burning_dhw__bool']

In [None]:
df_bools_to_float = df_prep[[col for col in df_prep.columns if col.endswith('__bool')]].copy()
for col in df_bools_to_float.columns:
    df_bools_to_float[col] = df_bools_to_float[col].astype('float')
df_bools_to_float.describe().T.drop(columns='count').style.format("{:.2%}")

### Calculate actual gas input power for all purposes based on smart meter data

In [None]:
%%time
# Smart meters measure and correct for temperature; correct for actual air pressure and  actual calorific value is not yet done; we do it here.


# Calorific value conversion factor from Groningen gas gas to the actual gas used 
df_prep.loc[:,'gas_calorific_factor_groningen_hhv_to_actual_hhv__J0'] = df_prep['gas_std_hhv__J_m_3'] / gas_groningen_nl_avg_std_hhv__J_m_3

# Pressure conversion factor to correct pressure conversion by smart meter (assumed: P_nl_avg__Pa) using actual pressure (KNMI), including constant overpressure
df_prep.loc[:,'gas_pressure_factor_correct_smart_meter_to_actual__J0'] = (
    (P_nl_avg__Pa + overpressure_gas_nl_avg__Pa)
    /
    (df_prep['batch_import_KNMI_air__Pa'] + overpressure_gas_nl_avg__Pa)
)

df_prep.loc[:,'calculated_g_use_hhv__W'] = (
    df_prep['device_p1-reader_g_use_hhv__W']
    * df_prep['gas_calorific_factor_groningen_hhv_to_actual_hhv__J0']     # calorific conversion factor
    * df_prep['gas_pressure_factor_correct_smart_meter_to_actual__J0']    # pressure conversion factor
)

## Use boiler-specific efficiency to calculate Q_gain_ch__W

### Lookup of momentary boiler efficiency from boiler-specific efficiency curves

In [None]:
%%time
# Round return temperatures to whole degrees
df_prep['rounded_temp_ret__degC'] = df_prep['batch_import_remeha_temp_ret__degC'].round().astype('Int8')

In [None]:
%%time
df_prep.loc[:,'rounded_load__%'] = (
    (df_prep['calculated_fan_frac__0'] + (1 - df_prep['calculated_fan_frac__0']) * df_prep['Qnh_min_lhv__kW'] / df_prep['Qnh_max_lhv__kW'])
    * 100
    * df_prep['batch_import_remeha_gas_valve_open__bool'] # make sure load = 0 when gas valve is closed
).round().astype('Int16')

### Reading boiler efficiency data

In [None]:
%%time
# Attempt to read the Parquet file
try:
    df_boiler_efficiency = pd.read_parquet(
        # boiler_returntemp_efficiency_file_path, 
        boiler_returntemp_load_efficiency_file_path, 
        engine='pyarrow',
        dtype_backend='numpy_nullable'
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")

In [None]:
%%time
# Merging DataFrames
df_prep = (df_prep
           .reset_index()
           .merge(df_boiler_efficiency.reset_index(),
                  on=['brand_model', 'rounded_load__%', 'rounded_temp_ret__degC'], 
                  how='left'
                  )
           .set_index(['id', 'timestamp'])
           .sort_index()
          )  


### Calculate Q_gain_ch__W

In [None]:
%%time
df_prep.loc[:,'calculated_Q_gain_ch__W'] = df_prep['calculated_g_use_ch_hhv__W'] * df_prep['eta_ch_hhv__W0']

In [None]:
sorted(list(df_prep.columns))

## Inspect results 

In [None]:
# Create masks
boiler_burning_mask = ((df_prep['batch_import_remeha_boiler_status_burning_ch__bool'] == True) | (df_prep['batch_import_remeha_boiler_status_burning_dhw__bool'] == True)) & (df_prep['batch_import_remeha_gas_valve_open__bool'] == True)
boiler_ch_mask = (df_prep['batch_import_remeha_boiler_status_burning_ch__bool'] == True) & (df_prep['batch_import_remeha_gas_valve_open__bool'] == True)
boiler_dhw_mask = (df_prep['batch_import_remeha_boiler_status_burning_dhw__bool'] == True) & (df_prep['batch_import_remeha_gas_valve_open__bool'] == True)
remeha_data_notna_mask = (df_prep['batch_import_remeha_temp_in__degC'].notna())
boiler_valve_closed_mask = (df_prep['batch_import_remeha_gas_valve_closed__bool'] == True)

In [None]:
# Select mask
boiler_status_mask = remeha_data_notna_mask 


### Inspecting gas power used by boiler based on fan speed [rpm]

In [None]:
df_prep[boiler_status_mask]['calculated_fan_frac__0'].mean()

In [None]:
df_prep[boiler_status_mask]['calculated_g25_3_use_boiler_lhv__W'].mean()

In [None]:
df_prep[boiler_status_mask]['gas_calorific_factor_g25_3_lhv_to_actual_hhv__J0'].mean()                                           

In [None]:
df_prep[boiler_status_mask]['gas_pressure_factor_ref_to_actual__J0'].mean()                                           

In [None]:
df_prep[boiler_status_mask]['gas_temp_factor_ref_to_actual__J0'].mean()                                           

In [None]:
conversion_factor_boiler = (
    df_prep[boiler_status_mask]['gas_calorific_factor_g25_3_lhv_to_actual_hhv__J0'].mean()
    * df_prep[boiler_status_mask]['gas_pressure_factor_ref_to_actual__J0'].mean()
    * df_prep[boiler_status_mask]['gas_temp_factor_ref_to_actual__J0'].mean()
)
conversion_factor_boiler

In [None]:
average_gas_boiler__W = df_prep[boiler_status_mask]['calculated_g_use_boiler_hhv__W'].mean()                              
average_gas_boiler__W

### Inspecting gas power based used by home, based on smart meter measurements

In [None]:
df_prep[boiler_status_mask]['device_p1-reader_g_use_hhv__W'].mean()

In [None]:
df_prep[boiler_status_mask]['gas_calorific_factor_groningen_hhv_to_actual_hhv__J0'].mean()

In [None]:
df_prep[boiler_status_mask]['gas_pressure_factor_correct_smart_meter_to_actual__J0'].mean()

In [None]:
conversion_factor_smart_meter = (
    df_prep[boiler_status_mask]['gas_calorific_factor_groningen_hhv_to_actual_hhv__J0'].mean()
    * df_prep[boiler_status_mask]['gas_pressure_factor_correct_smart_meter_to_actual__J0'].mean()
)
conversion_factor_smart_meter

In [None]:
df_prep[boiler_status_mask]['calculated_g_use_hhv__W'].mean()

### Inspecting gas power based used by boiler, based on boiler counters

In [None]:
average_gas_input_bdr_energy_counter_ch_lhv__W = df_prep[boiler_status_mask]['batch_import_remeha_g_use_ch_lhv__W'].mean()                              
average_gas_input_bdr_energy_counter_ch_lhv__W

In [None]:
df_prep[boiler_status_mask]['calculated_remeha_g_use_ch_hhv__W'].mean()

In [None]:
average_gas_input_bdr_energy_counter_dhw_lhv__W = df_prep[boiler_status_mask]['batch_import_remeha_g_use_dhw_lhv__W'].mean()                              
average_gas_input_bdr_energy_counter_dhw_lhv__W

In [None]:
df_prep[boiler_status_mask]['calculated_remeha_g_use_dhw_hhv__W'].mean()

In [None]:
average_gas_input_bdr_energy_counter_lhv__W = (average_gas_input_bdr_energy_counter_ch_lhv__W +  average_gas_input_bdr_energy_counter_dhw_lhv__W) 
average_gas_input_bdr_energy_counter_lhv__W

In [None]:
df_prep[boiler_status_mask]['calculated_remeha_g_use_ch_hhv__W'].mean() + df_prep[boiler_status_mask]['calculated_remeha_g_use_dhw_hhv__W'].mean()

### Compare gas power calculated in various ways

In [None]:
average_gas_boiler__W / df_prep[boiler_status_mask]['calculated_g_use_hhv__W'].mean()

In [None]:
average_gas_boiler__W / ( df_prep[boiler_status_mask]['calculated_remeha_g_use_ch_hhv__W'].mean() + df_prep[boiler_status_mask]['calculated_remeha_g_use_dhw_hhv__W'].mean())

In [None]:
df_prep[boiler_status_mask].groupby(level='id').agg({
    'calculated_remeha_g_use_ch_hhv__W': ['mean'],
    'calculated_remeha_g_use_dhw_hhv__W': ['mean'],
    'calculated_remeha_g_use_hhv__W': ['mean'],
    'calculated_g25_3_use_boiler_lhv__W': ['mean'],
    'calculated_g_use_ch_hhv__W': ['mean'],
    'calculated_g_use_dhw_hhv__W': ['mean'],
    'calculated_g_use_boiler_hhv__W': ['mean'],
    'calculated_g_use_hhv__W': ['mean'],
    'calculated_Q_gain_ch__W': ['mean'],
}).T

In [None]:
# Group by 'id' and calculate the mean for both 'eta_ch_hhv__W0' and 'batch_import_remeha_temp_ret__degC'
df_prep[boiler_status_mask].groupby(level='id').agg({
    'batch_import_remeha_temp_ch_sup_max__degC': 'mean',
    'batch_import_remeha_temp_sup__degC': 'mean',
    'batch_import_remeha_temp_ret__degC': 'mean',
    'rounded_temp_ret__degC': 'mean',
    'rounded_load__%': 'mean',
    'eta_ch_hhv__W0': 'mean',
}).sort_values(by='eta_ch_hhv__W0', ascending=False)

In [None]:
df_prep[boiler_ch_mask]['rounded_temp_ret__degC'].mean() 

In [None]:
df_prep[boiler_ch_mask & (df_prep['rounded_temp_ret__degC'] <30)]['rounded_temp_ret__degC'].count() / df_prep[boiler_ch_mask]['rounded_temp_ret__degC'].count()

In [None]:
df_prep[boiler_ch_mask]['eta_ch_hhv__W0'].mean() 

In [None]:
(df_prep.loc[boiler_ch_mask, 'eta_ch_hhv__W0'] * df_prep.loc[boiler_ch_mask, 'calculated_remeha_g_use_hhv__W']).sum() / df_prep.loc[boiler_ch_mask, 'calculated_remeha_g_use_hhv__W'].sum()


In [None]:
df_prep[boiler_ch_mask]['rounded_load__%'].mean() 

In [None]:
df_boiler_efficiency

### Boxplots per home (when boiler is burning for central heating & gas value is open)

In [None]:
# Group by 'id' and calculate the mean for both 'eta_ch_hhv__W0' and 'batch_import_remeha_temp_ret__degC'
df_prep[boiler_ch_mask].groupby(level='id').agg({
    'batch_import_remeha_temp_ch_sup_max__degC': 'mean',
    'batch_import_remeha_temp_sup__degC': 'mean',
    'batch_import_remeha_temp_ret__degC': 'mean',
    'rounded_temp_ret__degC': 'mean',
    'rounded_load__%': 'mean',
    'eta_ch_hhv__W0': 'mean',
    'calculated_Q_gain_ch__W': 'max',
}).sort_values(by='eta_ch_hhv__W0', ascending=False)

In [None]:
Plot.nfh_property_per_id_boxplot(df_prep[boiler_ch_mask], property_col='rounded_temp_ret__degC')

In [None]:
Plot.nfh_property_per_id_boxplot(df_prep[boiler_ch_mask], property_col='rounded_load__%')

In [None]:
Plot.nfh_property_per_id_boxplot(df_prep[boiler_ch_mask], property_col='eta_ch_hhv__W0')

In [None]:
Plot.nfh_property_per_id_boxplot(df_prep[boiler_ch_mask], property_col='calculated_Q_gain_ch__W')

## Write heat distribution results

### Writing heat distribution preprocessing results results to parquet file

In [None]:
df_heat_dist = df_prep[['batch_import_KNMI_ghi__W_m_2',
                        'batch_import_KNMI_temp_out__degC',
                        'batch_import_KNMI_wind__m_s_1',
                        'device_p1-reader_g_use_hhv__W',
                        'batch_import_remeha_boiler_status_burning_ch__bool',
                        'batch_import_remeha_boiler_status_burning_dhw__bool', 
                        'batch_import_remeha_gas_valve_closed__bool',
                        'batch_import_remeha_gas_valve_open__bool',
                        'batch_import_remeha_fan_rotations__min_1', 
                        'batch_import_remeha_g_use_ch_lhv__W',
                        'batch_import_remeha_g_use_dhw_lhv__W',
                        'batch_import_remeha_temp_set__degC',
                        'batch_import_remeha_temp_in__degC',
                        'device_living_room_calibrated_temp_in__degC',
                        'batch_import_remeha_temp_sup__degC',
                        'batch_import_remeha_temp_ret__degC',
                        'batch_import_remeha_temp_ch_sup_max__degC', 
                        'calculated_g_use_hhv__W',
                        'calculated_g_use_boiler_hhv__W',
                        'calculated_g_use_dhw_hhv__W',
                        'calculated_g_use_ch_hhv__W',
                        'eta_ch_hhv__W0',
                        'calculated_Q_gain_ch__W',
                        'rounded_load__%',
                        'rounded_temp_ret__degC',
                       ]]

In [None]:
df_heat_dist.info()

In [None]:
sorted(list(df_heat_dist.index.get_level_values('id').unique()))

In [None]:
%%time 
df_heat_dist.to_parquet(rhc_heat_dist_preprocessed_poperties_file, index=True, engine='pyarrow')

### Writing heat distribution preprocessing results to multiple zipped CSV files

In [None]:
%%time 
# uncomment this entire block of code to enable it 
# for home_id in tqdm(df_heat_dist.index.get_level_values('id').unique()[:3]):
# for home_id in tqdm(df_heat_dist.index.get_level_values('id').unique()[3:]):
for home_id in tqdm(df_heat_dist.index.get_level_values('id').unique()):
    df_heat_dist.xs(home_id, drop_level=False).to_csv(
        f'{home_id}_heat_dist_preprocessed_properties.zip',
        encoding='utf-8',
        compression= dict(method='zip',
                          archive_name=f'{home_id}_heat_dist_preprocessed_properties.csv'),
        date_format='%Y-%m-%dT%H:%M:%S%z'
    )


## Learn energy profile parameters

Most of the heavy lifting is done by the `learn_energy_profile()` function, which again uses the [GEKKO Python](https://machinelearning.byu.edu/) dynamic optimization toolkit.

In [None]:
# set room metadata to None, then learn_home_parameters() will derive the metadata from the ids.

hints = {
    'A_sol__m2': A_sol_nl_avg__m2,                      # initial estimate for apparent solar aperture
    'eta_ch_hhv__W0' : eta_ch_nl_avg_hhv__W0,           # average home heating efficiency of a gas boiler (based on higher heating value)
    'eta_not_ch_hhv__W0' : eta_not_ch_nl_avg_hhv__W0,   # average home heating efficiency indirecly DHW & cooking (based on higher heating value value)  
    'g_not_ch_hhv__W' : g_not_ch_nl_avg_hhv__W,         # average gas power (heating value) for other purposes than home heating [W] 
    'occupancy__p' : occupancy_nl_avg__p,               # average house occupancy (1.5: 2.2 persons, at home for 16.3 of 24 hours)
    'Q_gain_int__W_p_1' : Q_gain_int_nl_avg__W_p_1,     # average heat gain per occupant (77 W for 8.6 hours, 105 W for 7.7 hours)
    'wind_chill__K_s_m_1' : wind_chill_nl_avg__K_s_m_1, # average for Dutch homes, according to KNMI: https://cdn.knmi.nl/knmi/pdf/bibliotheek/knmipubmetnummer/knmipub219.pdf 
    'A_inf__m2': A_inf_nl_avg__m2,                       # average effective infiltration area corresponding to wind_chill_nl_avg__K_s_m_1
    'H_nl_avg__W_K_1' : H_nl_avg__W_K_1                 # average specific heat loss for a Dutch home
}

learn = ['A_sol__m2']

#define and select column names
property_sources = {
    'temp_in__degC' : 'batch_import_remeha_temp_in__degC',
    'temp_out__degC' : 'batch_import_KNMI_temp_out__degC',
    'wind__m_s_1' : 'batch_import_KNMI_wind__m_s_1',
    'ghi__W_m_2' : 'batch_import_KNMI_ghi__W_m_2', 
    'g_use_hhv__W' : 'device_p1-reader_g_use_hhv__W',
    'e_use__W' : 'calculated_device_p1-reader_e_use__W',
    'e_ret__W' : 'calculated_device_p1-reader_e_ret__W',
    'occupancy__p': 'device_living_room_occupancy__p' 
}

In [None]:
# learn the model parameters and write results to a dataframe
%autoreload 2
df_results_per_period, df_results = Learner.learn_energy_profile(df_prep, 
                                                                  property_sources = property_sources, 
                                                                  learn = learn, 
                                                                  hints = hints,
                                                                  ev_type = 2
                                                                 )

### Result per learning period

In [None]:
df_results_per_period

In [None]:
# show essential statistics for the learned values
df_stats = df_results_per_period.describe().filter(regex='^actual_|^learned_')
df_stats.loc[df_stats.index.get_level_values(0).isin(['mean', 'std', 'min', 'max'])]

In [None]:
# show essential statistics for the errors
df_stats = df_results_per_period.describe().filter(regex='^mae_|^rmse')
df_stats.loc[df_stats.index.get_level_values(0).isin(['mean', 'std', 'min', 'max'])]

In [None]:
# show essential statistics for the error values, per id
df_stats = df_results_per_period.groupby('id').describe().stack().filter(regex='^mae_|^rmse')
df_stats.loc[df_stats.index.get_level_values(1).isin(['mean', 'std', 'min', 'max'])]

### Result Visualization

In [None]:
df_results

In [None]:
df_plot = df_prep[[prop for prop in df_prep.columns.values if prop.split('__')[-1] == 'degC']]

In [None]:
#Plot only temperatures from all sources for all ids
Plot.dataframe_preprocessed_plot(df_plot, units_to_mathtext)

In [None]:
df_lot = df_prep

In [None]:
#Plot all properties from all sources for all ids
Plot.dataframe_preprocessed_plot(df_plot, units_to_mathtext)