# REDUCEDHEATCARB measurements plotting

In [None]:
import pandas as pd
import numpy as np

rhc_input_file_path='rhc_sane_meas.parquet'

# usually, two decimals suffice for displaying DataFrames (NB internally, precision may be higher)
pd.options.display.precision = 2

import sys
sys.path.append('../data/')
sys.path.append('../view/')

%load_ext autoreload


%matplotlib inline
%matplotlib widget
import pylab as plt
import itertools
from plotter import Plot
from tqdm.notebook import tqdm
from preprocessor import Preprocessor

In [None]:
units_to_mathtext = property_types = {
    'ppm' : r'$ppm$',
    'kWh' : r'$kWh$',
    'm3' : r'$m^{3}$',
    'degC' : r'$°C$',
    'W' : r'$W$',
    'V' : r'$V$',
    '0' : r'$[-]$',
    'bool': r'$0 = False; 1 = True$',
    'W_m_2' : r'$W\cdotm^{-1}$'
}

In [None]:
%%time
# Attempt to read the Parquet file
try:
    df = pd.read_parquet(
        rhc_input_file_path, 
        engine='pyarrow',
        use_nullable_dtypes=True
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")


In [None]:
df.info()

In [None]:
df.index.unique(level='id').values

In [None]:
df

## Drop duplicates and sort most complete ids first

In [None]:
ids_ordered = [424197, 494233, 404873, 458000, 434931, 450298, 456638, 495906, 444964, 
                  430062, 478667, 487126, 401632, 483173, 412715, 458852, 403603, 429011, 
                  449134, 410260]

In [None]:
# Drop duplicate labels from the index and sort df_prop_unique based on the 'id' index level using the specific order
df = df[~df.index.duplicated(keep='first')].reindex(ids_ordered, level='id')

In [None]:
df.index.unique(level='id').values

In [None]:
print('#ids: ', len(df.index.get_level_values('id').unique()))
print('ids: ', list(df.index.get_level_values('id').unique()))

print('source categories: ', list(df.index.get_level_values('source_category').unique()))
print('source types: ', list(df.index.get_level_values('source_type').unique()))
print('properties: ', list(df.index.get_level_values('property').unique()))

print('values: ', df['value'].count())
print('#values per: \n', df.groupby(['source_type']).size())
print('#values per: \n', df.groupby(['source_category', 'source_type', 'property']).size())

## Optional: remap to data science pseudonyms 

In [None]:
# mapping_df = pd.read_excel('pseudonym_id_student_mapping.xlsx')
# id_mapping_dict = dict(zip(mapping_df['id'], mapping_df['random_id']))
# df.index = df.index.set_levels(df.index.levels[0].map(id_mapping_dict).astype(df.index.levels[0].dtype), level=0)


## Plot data


In [None]:
Plot.nfh_measurements_plot(df, properties = ['temp_in__degC'], ids = [403603])


In [None]:
Plot.nfh_measurements_plot(df, source_categories = ['device'], properties = ['e_use_hi_cum__kWh'], ids = [404873])


In [None]:
Plot.nfh_measurements_plot(df, properties = ['dsmr_version__0'], ids = [404873])


In [None]:
Plot.nfh_measurements_plot(df, source_categories=['device'], properties = ['e_use_hi_cum__kWh'], ids = [404873])


In [None]:
# Plot.nfh_measurements_plot(df, properties = ['dsmr_version__0'])


In [None]:
Plot.nfh_measurements_plot(df, source_categories = ['device'], properties = ['e_use_hi_cum__kWh', 'e_use_lo_cum__kWh', 'e_ret_hi_cum__kWh', 'e_ret_lo_cum__kWh'], ids = [404873])

In [None]:
Plot.nfh_measurements_plot(df, source_categories = ['device'], properties = ['g_use_cum__m3'], ids = [404873])

In [None]:
# Plot.nfh_measurements_plot(df, source_categories = ['device'], properties = ['e_use_hi_cum__kWh', 'e_use_lo_cum__kWh', 'e_ret_hi_cum__kWh', 'e_ret_lo_cum__kWh'])

In [None]:
# Plot.nfh_measurements_plot(df, properties = ['temp_in__degC'])

In [None]:
Plot.nfh_measurements_plot(df, properties = ['temp_sup__degC', 'temp_ret__degC'], ids = [403603])

In [None]:
# Plot.nfh_measurements_plot(df, properties = ['temp_sup__degC', 'temp_ret__degC'])

In [None]:
Plot.nfh_measurements_plot(df, properties = ['battery_voltage__V', 'co2__ppm'], ids=[403603, 449134])

In [None]:
# Plot.nfh_measurements_plot(df, properties = ['battery_voltage__V', 'co2__ppm'])

In [None]:
Plot.nfh_measurements_plot(df, source_types = ['enelogic'], ids = [412715])
# Plot.nfh_measurements_plot(df, source_types = ['enelogic'], ids = [404873])

# Plot.nfh_measurements_plot(df, source_types = ['enelogic'])


In [None]:
# Plot.nfh_measurements_plot(df, source_types = ['twomes-p1-reader-firmware'], ids = [412715])
Plot.nfh_measurements_plot(df, source_types = ['twomes-p1-reader-firmware'], ids = [404873])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-p1-reader-firmware'])


In [None]:
Plot.nfh_measurements_plot(df, source_types = ['enelogic', 'twomes-p1-reader-firmware'], units = ['kWh'], ids = [412715])
# Plot.nfh_measurements_plot(df, source_types = ['enelogic', 'twomes-p1-reader-firmware'], ids = [404873])
# Plot.nfh_measurements_plot(df, source_types = ['enelogic', 'twomes-p1-reader-firmware'])

In [None]:
Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], ids = [412715])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], ids = [404873])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'])

In [None]:
Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], properties=['co2__ppm', 'onboarded__p', 'occupancy__p', 'temp_in__degC'], ids = [456638])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'], ids = [404873])
# Plot.nfh_measurements_plot(df, source_types = ['twomes-co2-occupancy-scd41-m5coreink-firmware'])

In [None]:
# Plot.nfh_measurements_plot(df, properties=['temp_in__degC'])

In [None]:
# Plot.nfh_measurements_plot(df, properties=['co2__ppm'])


In [None]:
Plot.nfh_measurements_plot(df, ids = [412715])
# Plot.nfh_measurements_plot(df, ids = [404873])
# Plot.nfh_measurements_plot(df)